diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp index d35ffcc..29d9471 100644 --- a/src/condor_procd/proc_family.cpp +++ b/src/condor_procd/proc_family.cpp @@ -54,7 +54,9 @@ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor, m_member_list(NULL) #if defined(HAVE_EXT_LIBCGROUP) , m_cgroup_string(""), - m_cm(CgroupManager::getInstance()) + m_cm(CgroupManager::getInstance()), + m_initial_user_cpu(0), + m_initial_sys_cpu(0) #endif { #if !defined(WIN32) @@ -188,6 +190,7 @@ after_migrate: cgroup_free(&orig_cgroup); } + after_restore: if (orig_cgroup_string != NULL) { free(orig_cgroup_string); @@ -231,6 +234,27 @@ ProcFamily::set_cgroup(const std::string &cgroup_string) member = member->m_next; } + // Record the amount of pre-existing CPU usage here. + m_initial_user_cpu = 0; + m_initial_sys_cpu = 0; + get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu); + + // Reset block IO controller + if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) { + struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); + struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR); + ASSERT (blkio_controller != NULL); // Block IO controller should already exist. + cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0); + int err; + if ((err = cgroup_modify_cgroup(tmp_cgroup))) { + // Not allowed to reset stats? + dprintf(D_ALWAYS, + "Unable to reset cgroup %s block IO statistics. " + "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n", + m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); + } + } + return 0; } @@ -486,6 +510,40 @@ ProcFamily::aggregate_usage_cgroup_blockio(ProcFamilyUsage* usage) return 0; } +int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) { + + if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) { + return 1; + } + + void * handle = NULL; + u_int64_t tmp = 0; + struct cgroup_stat stats; + int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats); + while (err != ECGEOF) { + if (err > 0) { + dprintf(D_PROCFAMILY, + "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n", + m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err)); + break; + } + if (_check_stat_uint64(stats, "user", &tmp)) { + user_time = tmp/clock_tick-m_initial_user_cpu; + } else if (_check_stat_uint64(stats, "system", &tmp)) { + sys_time = tmp/clock_tick-m_initial_sys_cpu; + } + err = cgroup_read_stats_next(&handle, &stats); + } + if (handle != NULL) { + cgroup_read_stats_end(&handle); + } + if (err != ECGEOF) { + dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err)); + return 1; + } + return 0; +} + int ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage) { @@ -496,16 +554,13 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage) int err; struct cgroup_stat stats; - void **handle; + void *handle = NULL; u_int64_t tmp = 0, image = 0; bool found_rss = false; // Update memory - handle = (void **)malloc(sizeof(void*)); - ASSERT (handle != NULL); - *handle = NULL; - err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats); + err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats); while (err != ECGEOF) { if (err > 0) { dprintf(D_PROCFAMILY, @@ -522,10 +577,10 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage) } else if (_check_stat_uint64(stats, "total_swap", &tmp)) { image += tmp; } - err = cgroup_read_stats_next(handle, &stats); + err = cgroup_read_stats_next(&handle, &stats); } - if (*handle != NULL) { - cgroup_read_stats_end(handle); + if (handle != NULL) { + cgroup_read_stats_end(&handle); } if (found_rss) { usage->total_image_size = image/1024; @@ -540,29 +595,12 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage) m_max_image_size = image/1024; } // Try updating the max size using cgroups - update_max_image_size_cgroup(); + // XXX: This is taken out for now - kernel calculates max INCLUDING + // the filesystem cache. Not what you want. + //update_max_image_size_cgroup(); // Update CPU - *handle = NULL; - err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats); - while (err != ECGEOF) { - if (err > 0) { - dprintf(D_PROCFAMILY, - "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n", - m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err)); - break; - } - if (_check_stat_uint64(stats, "user", &tmp)) { - usage->user_cpu_time = tmp/clock_tick; - } else if (_check_stat_uint64(stats, "system", &tmp)) { - usage->sys_cpu_time = tmp/clock_tick; - } - err = cgroup_read_stats_next(handle, &stats); - } - if (*handle != NULL) { - cgroup_read_stats_end(handle); - } - free(handle); + get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time); aggregate_usage_cgroup_blockio(usage); --- a/src/condor_procd/proc_family.h +++ b/src/condor_procd/proc_family.h @@ -181,6 +181,11 @@ private: std::string m_cgroup_string; CgroupManager &m_cm; static long clock_tick; + // Sometimes Condor doesn't successfully clear out the cgroup from the + // previous run. Hence, we subtract off any CPU usage found at the + // start of the job. + long m_initial_user_cpu; + long m_initial_sys_cpu; static bool have_warned_about_memsw; int count_tasks_cgroup(); @@ -190,6 +195,7 @@ private: int spree_cgroup(int); int migrate_to_cgroup(pid_t); void update_max_image_size_cgroup(); + int get_cpu_usage_cgroup(long &user_cpu, long &sys_cpu); #endif };