2019-01-12 04:58:27 +00:00
|
|
|
/*
|
|
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package cronjob
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/robfig/cron"
|
2020-08-10 17:43:49 +00:00
|
|
|
"k8s.io/klog/v2"
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
batchv1 "k8s.io/api/batch/v1"
|
|
|
|
batchv1beta1 "k8s.io/api/batch/v1beta1"
|
|
|
|
"k8s.io/api/core/v1"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"k8s.io/apimachinery/pkg/types"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Utilities for dealing with Jobs and CronJobs and time.
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
func inActiveList(cj batchv1beta1.CronJob, uid types.UID) bool {
|
|
|
|
for _, j := range cj.Status.Active {
|
2019-01-12 04:58:27 +00:00
|
|
|
if j.UID == uid {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
func deleteFromActiveList(cj *batchv1beta1.CronJob, uid types.UID) {
|
|
|
|
if cj == nil {
|
2019-01-12 04:58:27 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
newActive := []v1.ObjectReference{}
|
2020-08-10 17:43:49 +00:00
|
|
|
for _, j := range cj.Status.Active {
|
2019-01-12 04:58:27 +00:00
|
|
|
if j.UID != uid {
|
|
|
|
newActive = append(newActive, j)
|
|
|
|
}
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
cj.Status.Active = newActive
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// getParentUIDFromJob extracts UID of job's parent and whether it was found
|
|
|
|
func getParentUIDFromJob(j batchv1.Job) (types.UID, bool) {
|
|
|
|
controllerRef := metav1.GetControllerOf(&j)
|
|
|
|
|
|
|
|
if controllerRef == nil {
|
|
|
|
return types.UID(""), false
|
|
|
|
}
|
|
|
|
|
|
|
|
if controllerRef.Kind != "CronJob" {
|
|
|
|
klog.V(4).Infof("Job with non-CronJob parent, name %s namespace %s", j.Name, j.Namespace)
|
|
|
|
return types.UID(""), false
|
|
|
|
}
|
|
|
|
|
|
|
|
return controllerRef.UID, true
|
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
// groupJobsByParent groups jobs into a map keyed by the job parent UID (e.g. cronJob).
|
2019-01-12 04:58:27 +00:00
|
|
|
// It has no receiver, to facilitate testing.
|
|
|
|
func groupJobsByParent(js []batchv1.Job) map[types.UID][]batchv1.Job {
|
2020-08-10 17:43:49 +00:00
|
|
|
jobsByCj := make(map[types.UID][]batchv1.Job)
|
2019-01-12 04:58:27 +00:00
|
|
|
for _, job := range js {
|
|
|
|
parentUID, found := getParentUIDFromJob(job)
|
|
|
|
if !found {
|
|
|
|
klog.V(4).Infof("Unable to get parent uid from job %s in namespace %s", job.Name, job.Namespace)
|
|
|
|
continue
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
jobsByCj[parentUID] = append(jobsByCj[parentUID], job)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
return jobsByCj
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// getRecentUnmetScheduleTimes gets a slice of times (from oldest to latest) that have passed when a Job should have started but did not.
|
|
|
|
//
|
|
|
|
// If there are too many (>100) unstarted times, just give up and return an empty slice.
|
|
|
|
// If there were missed times prior to the last known start time, then those are not returned.
|
2020-08-10 17:43:49 +00:00
|
|
|
func getRecentUnmetScheduleTimes(cj batchv1beta1.CronJob, now time.Time) ([]time.Time, error) {
|
2019-01-12 04:58:27 +00:00
|
|
|
starts := []time.Time{}
|
2020-08-10 17:43:49 +00:00
|
|
|
sched, err := cron.ParseStandard(cj.Spec.Schedule)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
2020-08-10 17:43:49 +00:00
|
|
|
return starts, fmt.Errorf("unparseable schedule: %s : %s", cj.Spec.Schedule, err)
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var earliestTime time.Time
|
2020-08-10 17:43:49 +00:00
|
|
|
if cj.Status.LastScheduleTime != nil {
|
|
|
|
earliestTime = cj.Status.LastScheduleTime.Time
|
2019-01-12 04:58:27 +00:00
|
|
|
} else {
|
2020-08-10 17:43:49 +00:00
|
|
|
// If none found, then this is either a recently created cronJob,
|
2019-01-12 04:58:27 +00:00
|
|
|
// or the active/completed info was somehow lost (contract for status
|
|
|
|
// in kubernetes says it may need to be recreated), or that we have
|
|
|
|
// started a job, but have not noticed it yet (distributed systems can
|
|
|
|
// have arbitrary delays). In any case, use the creation time of the
|
|
|
|
// CronJob as last known start time.
|
2020-08-10 17:43:49 +00:00
|
|
|
earliestTime = cj.ObjectMeta.CreationTimestamp.Time
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
if cj.Spec.StartingDeadlineSeconds != nil {
|
2019-01-12 04:58:27 +00:00
|
|
|
// Controller is not going to schedule anything below this point
|
2020-08-10 17:43:49 +00:00
|
|
|
schedulingDeadline := now.Add(-time.Second * time.Duration(*cj.Spec.StartingDeadlineSeconds))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
if schedulingDeadline.After(earliestTime) {
|
|
|
|
earliestTime = schedulingDeadline
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if earliestTime.After(now) {
|
|
|
|
return []time.Time{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for t := sched.Next(earliestTime); !t.After(now); t = sched.Next(t) {
|
|
|
|
starts = append(starts, t)
|
|
|
|
// An object might miss several starts. For example, if
|
|
|
|
// controller gets wedged on friday at 5:01pm when everyone has
|
|
|
|
// gone home, and someone comes in on tuesday AM and discovers
|
|
|
|
// the problem and restarts the controller, then all the hourly
|
2020-08-10 17:43:49 +00:00
|
|
|
// jobs, more than 80 of them for one hourly cronJob, should
|
|
|
|
// all start running with no further intervention (if the cronJob
|
2019-01-12 04:58:27 +00:00
|
|
|
// allows concurrency and late starts).
|
|
|
|
//
|
|
|
|
// However, if there is a bug somewhere, or incorrect clock
|
|
|
|
// on controller's server or apiservers (for setting creationTimestamp)
|
|
|
|
// then there could be so many missed start times (it could be off
|
|
|
|
// by decades or more), that it would eat up all the CPU and memory
|
|
|
|
// of this controller. In that case, we want to not try to list
|
|
|
|
// all the missed start times.
|
|
|
|
//
|
|
|
|
// I've somewhat arbitrarily picked 100, as more than 80,
|
|
|
|
// but less than "lots".
|
|
|
|
if len(starts) > 100 {
|
|
|
|
// We can't get the most recent times so just return an empty slice
|
2019-08-30 18:33:25 +00:00
|
|
|
return []time.Time{}, fmt.Errorf("too many missed start time (> 100). Set or decrease .spec.startingDeadlineSeconds or check clock skew")
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return starts, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getJobFromTemplate makes a Job from a CronJob
|
2020-08-10 17:43:49 +00:00
|
|
|
func getJobFromTemplate(cj *batchv1beta1.CronJob, scheduledTime time.Time) (*batchv1.Job, error) {
|
|
|
|
labels := copyLabels(&cj.Spec.JobTemplate)
|
|
|
|
annotations := copyAnnotations(&cj.Spec.JobTemplate)
|
2019-01-12 04:58:27 +00:00
|
|
|
// We want job names for a given nominal start time to have a deterministic name to avoid the same job being created twice
|
2020-08-10 17:43:49 +00:00
|
|
|
name := fmt.Sprintf("%s-%d", cj.Name, getTimeHash(scheduledTime))
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
job := &batchv1.Job{
|
|
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
|
|
Labels: labels,
|
|
|
|
Annotations: annotations,
|
|
|
|
Name: name,
|
2020-08-10 17:43:49 +00:00
|
|
|
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(cj, controllerKind)},
|
2019-01-12 04:58:27 +00:00
|
|
|
},
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
cj.Spec.JobTemplate.Spec.DeepCopyInto(&job.Spec)
|
2019-01-12 04:58:27 +00:00
|
|
|
return job, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getTimeHash returns Unix Epoch Time
|
|
|
|
func getTimeHash(scheduledTime time.Time) int64 {
|
|
|
|
return scheduledTime.Unix()
|
|
|
|
}
|
|
|
|
|
|
|
|
func getFinishedStatus(j *batchv1.Job) (bool, batchv1.JobConditionType) {
|
|
|
|
for _, c := range j.Status.Conditions {
|
|
|
|
if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == v1.ConditionTrue {
|
|
|
|
return true, c.Type
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false, ""
|
|
|
|
}
|
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
// IsJobFinished returns whether or not a job has completed successfully or failed.
|
2019-01-12 04:58:27 +00:00
|
|
|
func IsJobFinished(j *batchv1.Job) bool {
|
|
|
|
isFinished, _ := getFinishedStatus(j)
|
|
|
|
return isFinished
|
|
|
|
}
|
|
|
|
|
|
|
|
// byJobStartTime sorts a list of jobs by start timestamp, using their names as a tie breaker.
|
|
|
|
type byJobStartTime []batchv1.Job
|
|
|
|
|
|
|
|
func (o byJobStartTime) Len() int { return len(o) }
|
|
|
|
func (o byJobStartTime) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
|
|
|
|
|
|
|
|
func (o byJobStartTime) Less(i, j int) bool {
|
2019-04-07 17:07:55 +00:00
|
|
|
if o[i].Status.StartTime == nil && o[j].Status.StartTime != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if o[i].Status.StartTime != nil && o[j].Status.StartTime == nil {
|
|
|
|
return true
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
if o[i].Status.StartTime.Equal(o[j].Status.StartTime) {
|
|
|
|
return o[i].Name < o[j].Name
|
|
|
|
}
|
|
|
|
return o[i].Status.StartTime.Before(o[j].Status.StartTime)
|
|
|
|
}
|