|
|
85aed4e |
diff --git a/build/cmake/CondorConfigure.cmake b/build/cmake/CondorConfigure.cmake
|
|
|
85aed4e |
index e61fb4f..1094cb3 100644
|
|
|
85aed4e |
--- a/build/cmake/CondorConfigure.cmake
|
|
|
85aed4e |
+++ b/build/cmake/CondorConfigure.cmake
|
|
|
85aed4e |
@@ -164,6 +164,7 @@ if( NOT WINDOWS)
|
|
|
85aed4e |
check_function_exists("setlinebuf" HAVE_SETLINEBUF)
|
|
|
85aed4e |
check_function_exists("snprintf" HAVE_SNPRINTF)
|
|
|
85aed4e |
check_function_exists("snprintf" HAVE_WORKING_SNPRINTF)
|
|
|
85aed4e |
+ check_function_exists("eventfd" HAVE_EVENTFD)
|
|
|
85aed4e |
|
|
|
85aed4e |
check_function_exists("stat64" HAVE_STAT64)
|
|
|
85aed4e |
check_function_exists("_stati64" HAVE__STATI64)
|
|
|
85aed4e |
diff --git a/src/condor_includes/config.h.cmake b/src/condor_includes/config.h.cmake
|
|
|
85aed4e |
index b083945..3bd92b0 100644
|
|
|
85aed4e |
--- a/src/condor_includes/config.h.cmake
|
|
|
85aed4e |
+++ b/src/condor_includes/config.h.cmake
|
|
|
85aed4e |
@@ -438,6 +438,9 @@
|
|
|
85aed4e |
/* Define to 1 if you have the 'snprintf' function. (USED)*/
|
|
|
85aed4e |
#cmakedefine HAVE_SNPRINTF 1
|
|
|
85aed4e |
|
|
|
85aed4e |
+/* Define to 1 if you have the 'eventfd' function. (USED)*/
|
|
|
85aed4e |
+#cmakedefine HAVE_EVENTFD 1
|
|
|
85aed4e |
+
|
|
|
85aed4e |
/* Define to 1 if you have the 'stat64' function. (USED)*/
|
|
|
85aed4e |
#cmakedefine HAVE_STAT64 1
|
|
|
85aed4e |
|
|
|
85aed4e |
diff --git a/src/condor_starter.V6.1/vanilla_proc.cpp b/src/condor_starter.V6.1/vanilla_proc.cpp
|
|
|
85aed4e |
index 2e5538f..0246e5e 100644
|
|
|
85aed4e |
--- a/src/condor_starter.V6.1/vanilla_proc.cpp
|
|
|
85aed4e |
+++ b/src/condor_starter.V6.1/vanilla_proc.cpp
|
|
|
85aed4e |
@@ -42,9 +42,16 @@
|
|
|
85aed4e |
extern dynuser* myDynuser;
|
|
|
85aed4e |
#endif
|
|
|
85aed4e |
|
|
|
85aed4e |
+#if defined(HAVE_EVENTFD)
|
|
|
85aed4e |
+#include <sys/eventfd.h>
|
|
|
85aed4e |
+#endif
|
|
|
85aed4e |
+
|
|
|
85aed4e |
extern CStarter *Starter;
|
|
|
85aed4e |
|
|
|
85aed4e |
-VanillaProc::VanillaProc(ClassAd* jobAd) : OsProc(jobAd)
|
|
|
85aed4e |
+VanillaProc::VanillaProc(ClassAd* jobAd) : OsProc(jobAd),
|
|
|
85aed4e |
+ m_memory_limit(-1),
|
|
|
85aed4e |
+ m_oom_fd(-1),
|
|
|
85aed4e |
+ m_oom_efd(-1)
|
|
|
85aed4e |
{
|
|
|
85aed4e |
#if !defined(WIN32)
|
|
|
85aed4e |
m_escalation_tid = -1;
|
|
|
85aed4e |
@@ -215,6 +222,12 @@ VanillaProc::StartJob()
|
|
|
85aed4e |
}
|
|
|
85aed4e |
fi.group_ptr = &tracking_gid;
|
|
|
85aed4e |
}
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Increase the OOM score of this process; the child will inherit it.
|
|
|
85aed4e |
+ // This way, the job will be heavily preferred to be killed over a normal process.
|
|
|
85aed4e |
+ // OOM score is currently exponential - a score of 4 is a factor-16 increase in
|
|
|
85aed4e |
+ // the OOM score.
|
|
|
85aed4e |
+ setupOOMScore(4);
|
|
|
85aed4e |
#endif
|
|
|
85aed4e |
|
|
|
85aed4e |
#if defined(HAVE_EXT_LIBCGROUP)
|
|
|
85aed4e |
@@ -406,6 +419,7 @@ VanillaProc::StartJob()
|
|
|
85aed4e |
int MemMb;
|
|
|
85aed4e |
if (MachineAd->LookupInteger(ATTR_MEMORY, MemMb)) {
|
|
|
85aed4e |
uint64_t MemMb_big = MemMb;
|
|
|
85aed4e |
+ m_memory_limit = MemMb_big;
|
|
|
85aed4e |
climits.set_memory_limit_bytes(1024*1024*MemMb_big, mem_is_soft);
|
|
|
85aed4e |
} else {
|
|
|
85aed4e |
dprintf(D_ALWAYS, "Not setting memory soft limit in cgroup because "
|
|
|
85aed4e |
@@ -425,6 +439,14 @@ VanillaProc::StartJob()
|
|
|
85aed4e |
} else {
|
|
|
85aed4e |
dprintf(D_FULLDEBUG, "Invalid value of SlotWeight in machine ClassAd; ignoring.\n");
|
|
|
85aed4e |
}
|
|
|
85aed4e |
+ setupOOMEvent(cgroup);
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Now that the job is started, decrease the likelihood that the starter
|
|
|
85aed4e |
+ // is killed instead of the job itself.
|
|
|
85aed4e |
+ if (retval)
|
|
|
85aed4e |
+ {
|
|
|
85aed4e |
+ setupOOMScore(-4);
|
|
|
85aed4e |
}
|
|
|
85aed4e |
|
|
|
85aed4e |
#endif
|
|
|
85aed4e |
@@ -611,5 +633,226 @@ VanillaProc::finishShutdownFast()
|
|
|
85aed4e |
// -gquinn, 2007-11-14
|
|
|
85aed4e |
daemonCore->Kill_Family(JobPid);
|
|
|
85aed4e |
|
|
|
85aed4e |
+ if (m_oom_efd >= 0) {
|
|
|
85aed4e |
+ dprintf(D_FULLDEBUG, "Closing event FD pipe in shutdown %d.\n", m_oom_efd);
|
|
|
85aed4e |
+ daemonCore->Close_Pipe(m_oom_efd);
|
|
|
85aed4e |
+ m_oom_efd = -1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ if (m_oom_fd >= 0) {
|
|
|
85aed4e |
+ close(m_oom_fd);
|
|
|
85aed4e |
+ m_oom_fd = -1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
return false; // shutdown is pending, so return false
|
|
|
85aed4e |
}
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+/*
|
|
|
85aed4e |
+ * This will be called when the event fd fires, indicating an OOM event.
|
|
|
85aed4e |
+ */
|
|
|
85aed4e |
+int
|
|
|
85aed4e |
+VanillaProc::outOfMemoryEvent(int /* fd */)
|
|
|
85aed4e |
+{
|
|
|
85aed4e |
+ std::stringstream ss;
|
|
|
85aed4e |
+ if (m_memory_limit >= 0) {
|
|
|
85aed4e |
+ ss << "Job has gone over memory limit of " << m_memory_limit << " megabytes.";
|
|
|
85aed4e |
+ } else {
|
|
|
85aed4e |
+ ss << "Job has encountered an out-of-memory event.";
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ Starter->jic->holdJob(ss.str().c_str(), CONDOR_HOLD_CODE_JobOutOfResources, 0);
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // this will actually clean up the job
|
|
|
85aed4e |
+ if ( Starter->Hold( ) ) {
|
|
|
85aed4e |
+ dprintf( D_FULLDEBUG, "All jobs were removed due to OOM event.\n" );
|
|
|
85aed4e |
+ Starter->allJobsDone();
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ dprintf(D_FULLDEBUG, "Closing event FD pipe %d.\n", m_oom_efd);
|
|
|
85aed4e |
+ daemonCore->Close_Pipe(m_oom_efd);
|
|
|
85aed4e |
+ close(m_oom_fd);
|
|
|
85aed4e |
+ m_oom_efd = -1;
|
|
|
85aed4e |
+ m_oom_fd = -1;
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ Starter->ShutdownFast();
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+}
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+int
|
|
|
85aed4e |
+VanillaProc::setupOOMScore(int new_score)
|
|
|
85aed4e |
+{
|
|
|
85aed4e |
+#if !defined(LINUX)
|
|
|
85aed4e |
+ if (new_score) // Done to suppress compiler warnings.
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+#else
|
|
|
85aed4e |
+ TemporaryPrivSentry sentry(PRIV_ROOT);
|
|
|
85aed4e |
+ // oom_adj is deprecated on modern kernels and causes a deprecation warning when used.
|
|
|
85aed4e |
+ int oom_score_fd = open("/proc/self/oom_score_adj", O_WRONLY);
|
|
|
85aed4e |
+ if (oom_score_fd == -1) {
|
|
|
85aed4e |
+ if (errno != ENOENT) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to open oom_score_adj for the starter: (errno=%u, %s)\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ } else {
|
|
|
85aed4e |
+ int oom_score_fd = open("/proc/self/oom_adj", O_WRONLY);
|
|
|
85aed4e |
+ if (oom_score_fd == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to open oom_adj for the starter: (errno=%u, %s)\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ } else {
|
|
|
85aed4e |
+ // oom_score_adj is linear; oom_adj was exponential.
|
|
|
85aed4e |
+ if (new_score > 0)
|
|
|
85aed4e |
+ new_score = 1 << new_score;
|
|
|
85aed4e |
+ else
|
|
|
85aed4e |
+ new_score = -(1 << -new_score);
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ std::stringstream ss;
|
|
|
85aed4e |
+ ss << new_score;
|
|
|
85aed4e |
+ std::string new_score_str = ss.str();
|
|
|
85aed4e |
+ ssize_t nwritten = full_write(oom_score_fd, new_score_str.c_str(), new_score_str.length());
|
|
|
85aed4e |
+ if (nwritten < 0) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to write into oom_adj file for the starter: (errno=%u, %s)\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ close(oom_score_fd);
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ close(oom_score_fd);
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+#endif
|
|
|
85aed4e |
+}
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+int
|
|
|
85aed4e |
+VanillaProc::setupOOMEvent(const std::string &cgroup_string)
|
|
|
85aed4e |
+{
|
|
|
85aed4e |
+#if !(defined(HAVE_EVENTFD) && defined(HAVE_EXT_LIBCGROUP))
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+#else
|
|
|
85aed4e |
+ // Initialize the event descriptor
|
|
|
85aed4e |
+ m_oom_efd = eventfd(0, EFD_CLOEXEC);
|
|
|
85aed4e |
+ if (m_oom_efd == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to create new event FD for starter: %u %s\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Find the memcg location on disk
|
|
|
85aed4e |
+ void * handle = NULL;
|
|
|
85aed4e |
+ struct cgroup_mount_point mount_info;
|
|
|
85aed4e |
+ int ret = cgroup_get_controller_begin(&handle, &mount_info);
|
|
|
85aed4e |
+ std::stringstream oom_control;
|
|
|
85aed4e |
+ std::stringstream event_control;
|
|
|
85aed4e |
+ bool found_memcg = false;
|
|
|
85aed4e |
+ while (ret == 0) {
|
|
|
85aed4e |
+ if (strcmp(mount_info.name, MEMORY_CONTROLLER_STR) == 0) {
|
|
|
85aed4e |
+ found_memcg = true;
|
|
|
85aed4e |
+ oom_control << mount_info.path << "/";
|
|
|
85aed4e |
+ event_control << mount_info.path << "/";
|
|
|
85aed4e |
+ break;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ cgroup_get_controller_next(&handle, &mount_info);
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ if (!found_memcg && (ret != ECGEOF)) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Error while locating memcg controller for starter: %u %s\n",
|
|
|
85aed4e |
+ ret, cgroup_strerror(ret));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ cgroup_get_controller_end(&handle);
|
|
|
85aed4e |
+ if (found_memcg == false) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Memcg is not available; OOM notification disabled for starter.\n");
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Finish constructing the location of the control files
|
|
|
85aed4e |
+ oom_control << cgroup_string << "/memory.oom_control";
|
|
|
85aed4e |
+ std::string oom_control_str = oom_control.str();
|
|
|
85aed4e |
+ event_control << cgroup_string << "/cgroup.event_control";
|
|
|
85aed4e |
+ std::string event_control_str = event_control.str();
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Open the oom_control and event control files
|
|
|
85aed4e |
+ TemporaryPrivSentry sentry(PRIV_ROOT);
|
|
|
85aed4e |
+ m_oom_fd = open(oom_control_str.c_str(), O_RDONLY | O_CLOEXEC);
|
|
|
85aed4e |
+ if (m_oom_fd == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to open the OOM control file for starter: %u %s\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ int event_ctrl_fd = open(event_control_str.c_str(), O_WRONLY | O_CLOEXEC);
|
|
|
85aed4e |
+ if (event_ctrl_fd == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to open event control for starter: %u %s\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Inform Linux we will be handling the OOM events for this container.
|
|
|
85aed4e |
+ int oom_fd2 = open(oom_control_str.c_str(), O_WRONLY | O_CLOEXEC);
|
|
|
85aed4e |
+ if (oom_fd2 == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to open the OOM control file for writing for starter: %u %s\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ const char limits [] = "1";
|
|
|
85aed4e |
+ ssize_t nwritten = full_write(oom_fd2, &limits, 1);
|
|
|
85aed4e |
+ if (nwritten < 0) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to set OOM control to %s for starter: %u %s\n",
|
|
|
85aed4e |
+ limits, errno, strerror(errno));
|
|
|
85aed4e |
+ close(event_ctrl_fd);
|
|
|
85aed4e |
+ close(oom_fd2);
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ close(oom_fd2);
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Create the subscription string:
|
|
|
85aed4e |
+ std::stringstream sub_ss;
|
|
|
85aed4e |
+ sub_ss << m_oom_efd << " " << m_oom_fd;
|
|
|
85aed4e |
+ std::string sub_str = sub_ss.str();
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ if ((nwritten = full_write(event_ctrl_fd, sub_str.c_str(), sub_str.size())) < 0) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS,
|
|
|
85aed4e |
+ "Unable to write into event control file for starter: %u %s\n",
|
|
|
85aed4e |
+ errno, strerror(errno));
|
|
|
85aed4e |
+ close(event_ctrl_fd);
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ close(event_ctrl_fd);
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Fool DC into talking to the eventfd
|
|
|
85aed4e |
+ int pipes[2]; pipes[0] = -1; pipes[1] = -1;
|
|
|
85aed4e |
+ int fd_to_replace = -1;
|
|
|
85aed4e |
+ if (daemonCore->Create_Pipe(pipes, true) == -1 || pipes[0] == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS, "Unable to create a DC pipe\n");
|
|
|
85aed4e |
+ close(m_oom_efd);
|
|
|
85aed4e |
+ m_oom_efd = -1;
|
|
|
85aed4e |
+ close(m_oom_fd);
|
|
|
85aed4e |
+ m_oom_fd = -1;
|
|
|
85aed4e |
+ return 1;
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ if ( daemonCore->Get_Pipe_FD(pipes[0], &fd_to_replace) == -1 || fd_to_replace == -1) {
|
|
|
85aed4e |
+ dprintf(D_ALWAYS, "Unable to lookup pipe's FD\n");
|
|
|
85aed4e |
+ close(m_oom_efd); m_oom_efd = -1;
|
|
|
85aed4e |
+ close(m_oom_fd); m_oom_fd = -1;
|
|
|
85aed4e |
+ daemonCore->Close_Pipe(pipes[0]);
|
|
|
85aed4e |
+ daemonCore->Close_Pipe(pipes[1]);
|
|
|
85aed4e |
+ }
|
|
|
85aed4e |
+ dup3(m_oom_efd, fd_to_replace, O_CLOEXEC);
|
|
|
85aed4e |
+ close(m_oom_efd);
|
|
|
85aed4e |
+ m_oom_efd = pipes[0];
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Inform DC we want to recieve notifications from this FD.
|
|
|
85aed4e |
+ daemonCore->Register_Pipe(pipes[0],"OOM event fd", static_cast<PipeHandlercpp>(&VanillaProc::outOfMemoryEvent),"OOM Event Handler",this,HANDLE_READ);
|
|
|
85aed4e |
+ return 0;
|
|
|
85aed4e |
+#endif
|
|
|
85aed4e |
+}
|
|
|
85aed4e |
+
|
|
|
85aed4e |
diff --git a/src/condor_starter.V6.1/vanilla_proc.h b/src/condor_starter.V6.1/vanilla_proc.h
|
|
|
85aed4e |
index d524cf5..90b4741 100644
|
|
|
85aed4e |
--- a/src/condor_starter.V6.1/vanilla_proc.h
|
|
|
85aed4e |
+++ b/src/condor_starter.V6.1/vanilla_proc.h
|
|
|
85aed4e |
@@ -74,6 +74,15 @@ private:
|
|
|
85aed4e |
#if !defined(WIN32)
|
|
|
85aed4e |
int m_escalation_tid;
|
|
|
85aed4e |
#endif
|
|
|
85aed4e |
+
|
|
|
85aed4e |
+ // Configure OOM killer for this job
|
|
|
85aed4e |
+ int m_memory_limit; // Memory limit, in MB.
|
|
|
85aed4e |
+ int m_oom_fd; // The file descriptor which recieves events
|
|
|
85aed4e |
+ int m_oom_efd; // The event FD to watch
|
|
|
85aed4e |
+ int setupOOMScore(int new_score);
|
|
|
85aed4e |
+ int outOfMemoryEvent(int fd);
|
|
|
85aed4e |
+ int setupOOMEvent(const std::string & cgroup_string);
|
|
|
85aed4e |
+
|
|
|
85aed4e |
};
|
|
|
85aed4e |
|
|
|
85aed4e |
#endif
|
|
|
85aed4e |
diff --git a/src/condor_utils/condor_holdcodes.h b/src/condor_utils/condor_holdcodes.h
|
|
|
85aed4e |
index d788d6e..3083db3 100644
|
|
|
85aed4e |
--- a/src/condor_utils/condor_holdcodes.h
|
|
|
85aed4e |
+++ b/src/condor_utils/condor_holdcodes.h
|
|
|
85aed4e |
@@ -128,4 +128,6 @@ const int CONDOR_HOLD_CODE_GlexecChownSandboxToCondor = 30;
|
|
|
85aed4e |
|
|
|
85aed4e |
const int CONDOR_HOLD_CODE_PrivsepChownSandboxToCondor = 31;
|
|
|
85aed4e |
|
|
|
85aed4e |
+const int CONDOR_HOLD_CODE_JobOutOfResources = 32;
|
|
|
85aed4e |
+
|
|
|
85aed4e |
#endif
|