Tree - rpms/condor - src.fedoraproject.org

rpms / condor

Blame cgroup_reset_stats.patch

Blob History Raw

		b3fe293	`diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp`
		b3fe293	`index d35ffcc..29d9471 100644`
		b3fe293	`--- a/src/condor_procd/proc_family.cpp`
		b3fe293	`+++ b/src/condor_procd/proc_family.cpp`
		b3fe293	`@@ -54,7 +54,9 @@ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor,`
		b3fe293	`m_member_list(NULL)`
		b3fe293	`#if defined(HAVE_EXT_LIBCGROUP)`
		b3fe293	`, m_cgroup_string(""),`
		b3fe293	`- m_cm(CgroupManager::getInstance())`
		b3fe293	`+ m_cm(CgroupManager::getInstance()),`
		b3fe293	`+ m_initial_user_cpu(0),`
		b3fe293	`+ m_initial_sys_cpu(0)`
		b3fe293	`#endif`
		b3fe293	`{`
		b3fe293	`#if !defined(WIN32)`
		b3fe293	`@@ -188,6 +190,7 @@ after_migrate:`
		b3fe293	`cgroup_free(&orig_cgroup);`
		b3fe293	`}`
		b3fe293
		b3fe293	`+`
		b3fe293	`after_restore:`
		b3fe293	`if (orig_cgroup_string != NULL) {`
		b3fe293	`free(orig_cgroup_string);`
		b3fe293	`@@ -231,6 +234,27 @@ ProcFamily::set_cgroup(const std::string &cgroup_string)`
		b3fe293	`member = member->m_next;`
		b3fe293	`}`
		b3fe293
		b3fe293	`+ // Record the amount of pre-existing CPU usage here.`
		b3fe293	`+ m_initial_user_cpu = 0;`
		b3fe293	`+ m_initial_sys_cpu = 0;`
		b3fe293	`+ get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu);`
		b3fe293	`+`
		b3fe293	`+ // Reset block IO controller`
		b3fe293	`+ if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) {`
		b3fe293	`+ struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());`
		b3fe293	`+ struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR);`
		b3fe293	`+ ASSERT (blkio_controller != NULL); // Block IO controller should already exist.`
		b3fe293	`+ cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0);`
		b3fe293	`+ int err;`
		b3fe293	`+ if ((err = cgroup_modify_cgroup(tmp_cgroup))) {`
		b3fe293	`+ // Not allowed to reset stats?`
		b3fe293	`+ dprintf(D_ALWAYS,`
		b3fe293	`+ "Unable to reset cgroup %s block IO statistics. "`
		b3fe293	`+ "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n",`
		b3fe293	`+ m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));`
		b3fe293	`+ }`
		b3fe293	`+ }`
		b3fe293	`+`
		b3fe293	`return 0;`
		b3fe293	`}`
		b3fe293
		b3fe293	`@@ -486,6 +510,40 @@ ProcFamily::aggregate_usage_cgroup_blockio(ProcFamilyUsage* usage)`
		b3fe293	`return 0;`
		b3fe293	`}`
		b3fe293
		b3fe293	`+int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) {`
		b3fe293	`+`
		b3fe293	`+ if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) {`
		b3fe293	`+ return 1;`
		b3fe293	`+ }`
		b3fe293	`+`
		b3fe293	`+ void * handle = NULL;`
		b3fe293	`+ u_int64_t tmp = 0;`
		b3fe293	`+ struct cgroup_stat stats;`
		b3fe293	`+ int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);`
		b3fe293	`+ while (err != ECGEOF) {`
		b3fe293	`+ if (err > 0) {`
		b3fe293	`+ dprintf(D_PROCFAMILY,`
		b3fe293	`+ "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",`
		b3fe293	`+ m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));`
		b3fe293	`+ break;`
		b3fe293	`+ }`
		b3fe293	`+ if (_check_stat_uint64(stats, "user", &tmp)) {`
		b3fe293	`+ user_time = tmp/clock_tick-m_initial_user_cpu;`
		b3fe293	`+ } else if (_check_stat_uint64(stats, "system", &tmp)) {`
		b3fe293	`+ sys_time = tmp/clock_tick-m_initial_sys_cpu;`
		b3fe293	`+ }`
		b3fe293	`+ err = cgroup_read_stats_next(&handle, &stats);`
		b3fe293	`+ }`
		b3fe293	`+ if (handle != NULL) {`
		b3fe293	`+ cgroup_read_stats_end(&handle);`
		b3fe293	`+ }`
		b3fe293	`+ if (err != ECGEOF) {`
		b3fe293	`+ dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err));`
		b3fe293	`+ return 1;`
		b3fe293	`+ }`
		b3fe293	`+ return 0;`
		b3fe293	`+}`
		b3fe293	`+`
		b3fe293	`int`
		b3fe293	`ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)`
		b3fe293	`{`
		b3fe293	`@@ -496,16 +554,13 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)`
		b3fe293
		b3fe293	`int err;`
		b3fe293	`struct cgroup_stat stats;`
		b3fe293	`- void **handle;`
		b3fe293	`+ void *handle = NULL;`
		b3fe293	`u_int64_t tmp = 0, image = 0;`
		b3fe293	`bool found_rss = false;`
		b3fe293
		b3fe293	`// Update memory`
		b3fe293	`- handle = (void *)malloc(sizeof(void));`
		b3fe293	`- ASSERT (handle != NULL);`
		b3fe293	`- *handle = NULL;`
		b3fe293
		b3fe293	`- err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);`
		b3fe293	`+ err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);`
		b3fe293	`while (err != ECGEOF) {`
		b3fe293	`if (err > 0) {`
		b3fe293	`dprintf(D_PROCFAMILY,`
		b3fe293	`@@ -522,10 +577,10 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)`
		b3fe293	`} else if (_check_stat_uint64(stats, "total_swap", &tmp)) {`
		b3fe293	`image += tmp;`
		b3fe293	`}`
		b3fe293	`- err = cgroup_read_stats_next(handle, &stats);`
		b3fe293	`+ err = cgroup_read_stats_next(&handle, &stats);`
		b3fe293	`}`
		b3fe293	`- if (*handle != NULL) {`
		b3fe293	`- cgroup_read_stats_end(handle);`
		b3fe293	`+ if (handle != NULL) {`
		b3fe293	`+ cgroup_read_stats_end(&handle);`
		b3fe293	`}`
		b3fe293	`if (found_rss) {`
		b3fe293	`usage->total_image_size = image/1024;`
		b3fe293	`@@ -540,29 +595,12 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)`
		b3fe293	`m_max_image_size = image/1024;`
		b3fe293	`}`
		b3fe293	`// Try updating the max size using cgroups`
		b3fe293	`- update_max_image_size_cgroup();`
		b3fe293	`+ // XXX: This is taken out for now - kernel calculates max INCLUDING`
		b3fe293	`+ // the filesystem cache. Not what you want.`
		b3fe293	`+ //update_max_image_size_cgroup();`
		b3fe293
		b3fe293	`// Update CPU`
		b3fe293	`- *handle = NULL;`
		b3fe293	`- err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);`
		b3fe293	`- while (err != ECGEOF) {`
		b3fe293	`- if (err > 0) {`
		b3fe293	`- dprintf(D_PROCFAMILY,`
		b3fe293	`- "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",`
		b3fe293	`- m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));`
		b3fe293	`- break;`
		b3fe293	`- }`
		b3fe293	`- if (_check_stat_uint64(stats, "user", &tmp)) {`
		b3fe293	`- usage->user_cpu_time = tmp/clock_tick;`
		b3fe293	`- } else if (_check_stat_uint64(stats, "system", &tmp)) {`
		b3fe293	`- usage->sys_cpu_time = tmp/clock_tick;`
		b3fe293	`- }`
		b3fe293	`- err = cgroup_read_stats_next(handle, &stats);`
		b3fe293	`- }`
		b3fe293	`- if (*handle != NULL) {`
		b3fe293	`- cgroup_read_stats_end(handle);`
		b3fe293	`- }`
		b3fe293	`- free(handle);`
		b3fe293	`+ get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time);`
		b3fe293
		b3fe293	`aggregate_usage_cgroup_blockio(usage);`
		b3fe293
		b3fe293	`--- a/src/condor_procd/proc_family.h`
		b3fe293	`+++ b/src/condor_procd/proc_family.h`
		b3fe293	`@@ -181,6 +181,11 @@ private:`
		b3fe293	`std::string m_cgroup_string;`
		b3fe293	`CgroupManager &m_cm;`
		b3fe293	`static long clock_tick;`
		b3fe293	`+ // Sometimes Condor doesn't successfully clear out the cgroup from the`
		b3fe293	`+ // previous run. Hence, we subtract off any CPU usage found at the`
		b3fe293	`+ // start of the job.`
		b3fe293	`+ long m_initial_user_cpu;`
		b3fe293	`+ long m_initial_sys_cpu;`
		b3fe293	`static bool have_warned_about_memsw;`
		b3fe293
		b3fe293	`int count_tasks_cgroup();`
		b3fe293	`@@ -190,6 +195,7 @@ private:`
		b3fe293	`int spree_cgroup(int);`
		b3fe293	`int migrate_to_cgroup(pid_t);`
		b3fe293	`void update_max_image_size_cgroup();`
		b3fe293	`+ int get_cpu_usage_cgroup(long &user_cpu, long &sys_cpu);`
		b3fe293	`#endif`
		b3fe293	`};`
		b3fe293

rpms / condor

Source Code

Blame cgroup_reset_stats.patch