2019-01-12 04:58:27 +00:00
/ *
Copyright 2016 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package cronjob
import (
"fmt"
"time"
"github.com/robfig/cron"
2020-08-10 17:43:49 +00:00
"k8s.io/klog/v2"
2019-01-12 04:58:27 +00:00
batchv1 "k8s.io/api/batch/v1"
batchv1beta1 "k8s.io/api/batch/v1beta1"
2020-12-01 01:06:26 +00:00
corev1 "k8s.io/api/core/v1"
2019-01-12 04:58:27 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
2020-12-01 01:06:26 +00:00
"k8s.io/client-go/tools/record"
2019-01-12 04:58:27 +00:00
)
// Utilities for dealing with Jobs and CronJobs and time.
2020-08-10 17:43:49 +00:00
func inActiveList ( cj batchv1beta1 . CronJob , uid types . UID ) bool {
for _ , j := range cj . Status . Active {
2019-01-12 04:58:27 +00:00
if j . UID == uid {
return true
}
}
return false
}
2020-08-10 17:43:49 +00:00
func deleteFromActiveList ( cj * batchv1beta1 . CronJob , uid types . UID ) {
if cj == nil {
2019-01-12 04:58:27 +00:00
return
}
2020-12-01 01:06:26 +00:00
// TODO: @alpatel the memory footprint can may be reduced here by
// cj.Status.Active = append(cj.Status.Active[:indexToRemove], cj.Status.Active[indexToRemove:]...)
newActive := [ ] corev1 . ObjectReference { }
2020-08-10 17:43:49 +00:00
for _ , j := range cj . Status . Active {
2019-01-12 04:58:27 +00:00
if j . UID != uid {
newActive = append ( newActive , j )
}
}
2020-08-10 17:43:49 +00:00
cj . Status . Active = newActive
2019-01-12 04:58:27 +00:00
}
// getParentUIDFromJob extracts UID of job's parent and whether it was found
func getParentUIDFromJob ( j batchv1 . Job ) ( types . UID , bool ) {
controllerRef := metav1 . GetControllerOf ( & j )
if controllerRef == nil {
return types . UID ( "" ) , false
}
if controllerRef . Kind != "CronJob" {
klog . V ( 4 ) . Infof ( "Job with non-CronJob parent, name %s namespace %s" , j . Name , j . Namespace )
return types . UID ( "" ) , false
}
return controllerRef . UID , true
}
2020-08-10 17:43:49 +00:00
// groupJobsByParent groups jobs into a map keyed by the job parent UID (e.g. cronJob).
2019-01-12 04:58:27 +00:00
// It has no receiver, to facilitate testing.
func groupJobsByParent ( js [ ] batchv1 . Job ) map [ types . UID ] [ ] batchv1 . Job {
2020-08-10 17:43:49 +00:00
jobsByCj := make ( map [ types . UID ] [ ] batchv1 . Job )
2019-01-12 04:58:27 +00:00
for _ , job := range js {
parentUID , found := getParentUIDFromJob ( job )
if ! found {
klog . V ( 4 ) . Infof ( "Unable to get parent uid from job %s in namespace %s" , job . Name , job . Namespace )
continue
}
2020-08-10 17:43:49 +00:00
jobsByCj [ parentUID ] = append ( jobsByCj [ parentUID ] , job )
2019-01-12 04:58:27 +00:00
}
2020-08-10 17:43:49 +00:00
return jobsByCj
2019-01-12 04:58:27 +00:00
}
// getRecentUnmetScheduleTimes gets a slice of times (from oldest to latest) that have passed when a Job should have started but did not.
//
// If there are too many (>100) unstarted times, just give up and return an empty slice.
// If there were missed times prior to the last known start time, then those are not returned.
2020-08-10 17:43:49 +00:00
func getRecentUnmetScheduleTimes ( cj batchv1beta1 . CronJob , now time . Time ) ( [ ] time . Time , error ) {
2019-01-12 04:58:27 +00:00
starts := [ ] time . Time { }
2020-08-10 17:43:49 +00:00
sched , err := cron . ParseStandard ( cj . Spec . Schedule )
2019-01-12 04:58:27 +00:00
if err != nil {
2020-08-10 17:43:49 +00:00
return starts , fmt . Errorf ( "unparseable schedule: %s : %s" , cj . Spec . Schedule , err )
2019-01-12 04:58:27 +00:00
}
var earliestTime time . Time
2020-08-10 17:43:49 +00:00
if cj . Status . LastScheduleTime != nil {
earliestTime = cj . Status . LastScheduleTime . Time
2019-01-12 04:58:27 +00:00
} else {
2020-08-10 17:43:49 +00:00
// If none found, then this is either a recently created cronJob,
2019-01-12 04:58:27 +00:00
// or the active/completed info was somehow lost (contract for status
// in kubernetes says it may need to be recreated), or that we have
// started a job, but have not noticed it yet (distributed systems can
// have arbitrary delays). In any case, use the creation time of the
// CronJob as last known start time.
2020-08-10 17:43:49 +00:00
earliestTime = cj . ObjectMeta . CreationTimestamp . Time
2019-01-12 04:58:27 +00:00
}
2020-08-10 17:43:49 +00:00
if cj . Spec . StartingDeadlineSeconds != nil {
2019-01-12 04:58:27 +00:00
// Controller is not going to schedule anything below this point
2020-08-10 17:43:49 +00:00
schedulingDeadline := now . Add ( - time . Second * time . Duration ( * cj . Spec . StartingDeadlineSeconds ) )
2019-01-12 04:58:27 +00:00
if schedulingDeadline . After ( earliestTime ) {
earliestTime = schedulingDeadline
}
}
if earliestTime . After ( now ) {
return [ ] time . Time { } , nil
}
for t := sched . Next ( earliestTime ) ; ! t . After ( now ) ; t = sched . Next ( t ) {
starts = append ( starts , t )
// An object might miss several starts. For example, if
// controller gets wedged on friday at 5:01pm when everyone has
// gone home, and someone comes in on tuesday AM and discovers
// the problem and restarts the controller, then all the hourly
2020-08-10 17:43:49 +00:00
// jobs, more than 80 of them for one hourly cronJob, should
// all start running with no further intervention (if the cronJob
2019-01-12 04:58:27 +00:00
// allows concurrency and late starts).
//
// However, if there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
if len ( starts ) > 100 {
// We can't get the most recent times so just return an empty slice
2019-08-30 18:33:25 +00:00
return [ ] time . Time { } , fmt . Errorf ( "too many missed start time (> 100). Set or decrease .spec.startingDeadlineSeconds or check clock skew" )
2019-01-12 04:58:27 +00:00
}
}
return starts , nil
}
2020-12-01 01:06:26 +00:00
// getUnmetScheduleTimes gets the slice of all the missed times from the time a job
// last was scheduled to up `now`.
//
// If there are too many (>100) unstarted times, it will raise a warning and but still return
// the list of missed times.
func getUnmetScheduleTimes ( cj batchv1beta1 . CronJob , now time . Time , schedule cron . Schedule , recorder record . EventRecorder ) [ ] time . Time {
starts := [ ] time . Time { }
var earliestTime time . Time
if cj . Status . LastScheduleTime != nil {
earliestTime = cj . Status . LastScheduleTime . Time
} else {
// If none found, then this is either a recently created cronJob,
// or the active/completed info was somehow lost (contract for status
// in kubernetes says it may need to be recreated), or that we have
// started a job, but have not noticed it yet (distributed systems can
// have arbitrary delays). In any case, use the creation time of the
// CronJob as last known start time.
earliestTime = cj . ObjectMeta . CreationTimestamp . Time
}
if cj . Spec . StartingDeadlineSeconds != nil {
// Controller is not going to schedule anything below this point
schedulingDeadline := now . Add ( - time . Second * time . Duration ( * cj . Spec . StartingDeadlineSeconds ) )
if schedulingDeadline . After ( earliestTime ) {
earliestTime = schedulingDeadline
}
}
if earliestTime . After ( now ) {
return [ ] time . Time { }
}
// t := schedule.Next(earliestTime)
// t1 := schedule.Next(t)
// delta := t1 - t
// missed := now - earliestTime/delta
// last missed = earliestTime + delta * (missed - 1)
// TODO: @alpatel, convert the following for loop into above logic and add test cases
for t := schedule . Next ( earliestTime ) ; ! t . After ( now ) ; t = schedule . Next ( t ) {
starts = append ( starts , t )
}
if len ( starts ) > 100 {
// An object might miss several starts. For example, if
// controller gets wedged on friday at 5:01pm when everyone has
// gone home, and someone comes in on tuesday AM and discovers
// the problem and restarts the controller, then all the hourly
// jobs, more than 80 of them for one hourly cronJob, should
// all start running with no further intervention (if the cronJob
// allows concurrency and late starts).
//
// However, if there is a bug somewhere, or incorrect clock
// on controller's server or apiservers (for setting creationTimestamp)
// then there could be so many missed start times (it could be off
// by decades or more), that it would eat up all the CPU and memory
// of this controller. In that case, we want to not try to list
// all the missed start times.
//
// I've somewhat arbitrarily picked 100, as more than 80,
// but less than "lots".
recorder . Eventf ( & cj , corev1 . EventTypeWarning , "TooManyMissedTimes" , "too many missed start times: %d. Set or decrease .spec.startingDeadlineSeconds or check clock skew" , len ( starts ) )
klog . InfoS ( "too many missed times" , "cronjob" , klog . KRef ( cj . GetNamespace ( ) , cj . GetName ( ) ) , "missed times" , len ( starts ) )
}
return starts
}
2019-01-12 04:58:27 +00:00
// getJobFromTemplate makes a Job from a CronJob
2020-08-10 17:43:49 +00:00
func getJobFromTemplate ( cj * batchv1beta1 . CronJob , scheduledTime time . Time ) ( * batchv1 . Job , error ) {
labels := copyLabels ( & cj . Spec . JobTemplate )
annotations := copyAnnotations ( & cj . Spec . JobTemplate )
2019-01-12 04:58:27 +00:00
// We want job names for a given nominal start time to have a deterministic name to avoid the same job being created twice
2020-08-10 17:43:49 +00:00
name := fmt . Sprintf ( "%s-%d" , cj . Name , getTimeHash ( scheduledTime ) )
2019-01-12 04:58:27 +00:00
job := & batchv1 . Job {
ObjectMeta : metav1 . ObjectMeta {
Labels : labels ,
Annotations : annotations ,
Name : name ,
2020-08-10 17:43:49 +00:00
OwnerReferences : [ ] metav1 . OwnerReference { * metav1 . NewControllerRef ( cj , controllerKind ) } ,
2019-01-12 04:58:27 +00:00
} ,
}
2020-08-10 17:43:49 +00:00
cj . Spec . JobTemplate . Spec . DeepCopyInto ( & job . Spec )
2019-01-12 04:58:27 +00:00
return job , nil
}
// getTimeHash returns Unix Epoch Time
func getTimeHash ( scheduledTime time . Time ) int64 {
return scheduledTime . Unix ( )
}
2020-12-01 01:06:26 +00:00
// getJobFromTemplate2 makes a Job from a CronJob. It converts the unix time into minutes from
// epoch time and concatenates that to the job name, because the cronjob_controller v2 has the lowest
// granularity of 1 minute for scheduling job.
func getJobFromTemplate2 ( cj * batchv1beta1 . CronJob , scheduledTime time . Time ) ( * batchv1 . Job , error ) {
labels := copyLabels ( & cj . Spec . JobTemplate )
annotations := copyAnnotations ( & cj . Spec . JobTemplate )
// We want job names for a given nominal start time to have a deterministic name to avoid the same job being created twice
name := getJobName ( cj , scheduledTime )
job := & batchv1 . Job {
ObjectMeta : metav1 . ObjectMeta {
Labels : labels ,
Annotations : annotations ,
Name : name ,
OwnerReferences : [ ] metav1 . OwnerReference { * metav1 . NewControllerRef ( cj , controllerKind ) } ,
} ,
}
cj . Spec . JobTemplate . Spec . DeepCopyInto ( & job . Spec )
return job , nil
}
// getTimeHash returns Unix Epoch Time in minutes
func getTimeHashInMinutes ( scheduledTime time . Time ) int64 {
return scheduledTime . Unix ( ) / 60
}
2019-01-12 04:58:27 +00:00
func getFinishedStatus ( j * batchv1 . Job ) ( bool , batchv1 . JobConditionType ) {
for _ , c := range j . Status . Conditions {
2020-12-01 01:06:26 +00:00
if ( c . Type == batchv1 . JobComplete || c . Type == batchv1 . JobFailed ) && c . Status == corev1 . ConditionTrue {
2019-01-12 04:58:27 +00:00
return true , c . Type
}
}
return false , ""
}
2019-08-30 18:33:25 +00:00
// IsJobFinished returns whether or not a job has completed successfully or failed.
2019-01-12 04:58:27 +00:00
func IsJobFinished ( j * batchv1 . Job ) bool {
isFinished , _ := getFinishedStatus ( j )
return isFinished
}
// byJobStartTime sorts a list of jobs by start timestamp, using their names as a tie breaker.
type byJobStartTime [ ] batchv1 . Job
func ( o byJobStartTime ) Len ( ) int { return len ( o ) }
func ( o byJobStartTime ) Swap ( i , j int ) { o [ i ] , o [ j ] = o [ j ] , o [ i ] }
func ( o byJobStartTime ) Less ( i , j int ) bool {
2019-04-07 17:07:55 +00:00
if o [ i ] . Status . StartTime == nil && o [ j ] . Status . StartTime != nil {
return false
}
if o [ i ] . Status . StartTime != nil && o [ j ] . Status . StartTime == nil {
return true
2019-01-12 04:58:27 +00:00
}
if o [ i ] . Status . StartTime . Equal ( o [ j ] . Status . StartTime ) {
return o [ i ] . Name < o [ j ] . Name
}
return o [ i ] . Status . StartTime . Before ( o [ j ] . Status . StartTime )
}
2020-12-01 01:06:26 +00:00
// byJobStartTimeStar sorts a list of jobs by start timestamp, using their names as a tie breaker.
type byJobStartTimeStar [ ] * batchv1 . Job
func ( o byJobStartTimeStar ) Len ( ) int { return len ( o ) }
func ( o byJobStartTimeStar ) Swap ( i , j int ) { o [ i ] , o [ j ] = o [ j ] , o [ i ] }
func ( o byJobStartTimeStar ) Less ( i , j int ) bool {
if o [ i ] . Status . StartTime == nil && o [ j ] . Status . StartTime != nil {
return false
}
if o [ i ] . Status . StartTime != nil && o [ j ] . Status . StartTime == nil {
return true
}
if o [ i ] . Status . StartTime . Equal ( o [ j ] . Status . StartTime ) {
return o [ i ] . Name < o [ j ] . Name
}
return o [ i ] . Status . StartTime . Before ( o [ j ] . Status . StartTime )
}