Blob Blame History Raw
diff --git a/src/condor_c-gahp/schedd_client.cpp b/src/condor_c-gahp/schedd_client.cpp
index e3acc44..ba96490 100644
--- a/src/condor_c-gahp/schedd_client.cpp
+++ b/src/condor_c-gahp/schedd_client.cpp
@@ -173,7 +173,10 @@ doContactSchedd()
 		}
 	}
 
-	
+	int interaction_time = param_integer("CGAHP_SCHEDD_INTERACTION_TIME", 5);
+	time_t starttime = time(NULL);
+	bool rerun_immediately = false;
+
 	SchedDRequest::schedd_command_type commands [] = {
 		SchedDRequest::SDC_REMOVE_JOB,
 		SchedDRequest::SDC_HOLD_JOB,
@@ -188,7 +191,11 @@ doContactSchedd()
 	int i=0;
 	while (i<3) {
 		
-		
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		StringList id_list;
 		SimpleList <SchedDRequest*> this_batch;
 
@@ -361,6 +368,12 @@ doContactSchedd()
 
 	SimpleList <SchedDRequest*> stage_in_batch;
 	do {
+
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		stage_in_batch.Clear();
 
 		command_queue.Rewind();
@@ -507,6 +520,11 @@ doContactSchedd()
 		if (current_command->command != SchedDRequest::SDC_JOB_REFRESH_PROXY)
 			continue;
 
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		time_t expiration_time = GetDesiredDelegatedJobCredentialExpiration(current_command->classad);
 		time_t result_expiration_time = 0;
 
@@ -591,7 +609,12 @@ doContactSchedd()
 
 		if (qmgr_connection == NULL)
 			goto update_report_result;
-		
+
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		error = FALSE;
 		errno = 0;
 		BeginTransaction();
@@ -631,7 +654,8 @@ doContactSchedd()
 					if( SetAttribute(current_command->cluster_id,
 											current_command->proc_id,
 											lhstr,
-											rhstr) == -1 ) {
+											rhstr,
+											SetAttribute_NoAck) == -1 ) {
 						if ( errno == ETIMEDOUT ) {
 							failure_line_num = __LINE__;
 							failure_errno = errno;
@@ -698,6 +722,11 @@ update_report_result:
 		if (current_command->command != SchedDRequest::SDC_UPDATE_LEASE)
 			continue;
 
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		std::string success_job_ids="";
 		if (qmgr_connection == NULL) {
 			sprintf( error_msg, "Error connecting to schedd %s", ScheddAddr );
@@ -803,6 +832,11 @@ update_report_result:
 		if (current_command->command != SchedDRequest::SDC_SUBMIT_JOB)
 			continue;
 
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		int ClusterId = -1;
 		int ProcId = -1;
 
@@ -929,7 +963,8 @@ update_report_result:
 					error = TRUE;
 				} else if( SetAttribute (ClusterId, ProcId,
 											lhstr,
-											rhstr) == -1 ) {
+											rhstr,
+											SetAttribute_NoAck) == -1 ) {
 					if ( errno == ETIMEDOUT ) {
 						failure_line_num = __LINE__;
 						failure_errno = errno;
@@ -993,6 +1028,11 @@ submit_report_result:
 		if (current_command->command != SchedDRequest::SDC_STATUS_CONSTRAINED)
 			continue;
 
+		if (time(NULL) - starttime > interaction_time) {
+			rerun_immediately = true;
+			break;
+		}
+
 		if (qmgr_connection != NULL) {
 			SimpleList <MyString *> matching_ads;
 
@@ -1165,9 +1205,14 @@ submit_report_result:
 		}
 	}
 
+	dprintf (D_FULLDEBUG, "Schedd interaction took %ld seconds.\n", time(NULL)-starttime);
+	if (rerun_immediately) {
+		dprintf (D_FULLDEBUG, "Schedd interaction time hit limit; will retry immediately.\n");
+	}
+
 	// Come back soon..
 	// QUESTION: Should this always be a fixed time period?
-	daemonCore->Reset_Timer( contactScheddTid, contact_schedd_interval );
+	daemonCore->Reset_Timer( contactScheddTid, rerun_immediately ? 1 : contact_schedd_interval );
 }