2015-06-10 20:58:22 +00:00
|
|
|
/*
|
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package podtask
|
|
|
|
|
|
|
|
import (
|
|
|
|
"container/ring"
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
log "github.com/golang/glog"
|
|
|
|
mesos "github.com/mesos/mesos-go/mesosproto"
|
2015-08-05 22:05:17 +00:00
|
|
|
"k8s.io/kubernetes/contrib/mesos/pkg/scheduler/metrics"
|
|
|
|
"k8s.io/kubernetes/pkg/api"
|
2015-06-10 20:58:22 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
//TODO(jdef) move this somewhere else
|
|
|
|
PodPath = "/pods"
|
|
|
|
|
|
|
|
// length of historical record of finished tasks
|
|
|
|
defaultFinishedTasksSize = 1024
|
|
|
|
)
|
|
|
|
|
|
|
|
// state store for pod tasks
|
|
|
|
type Registry interface {
|
|
|
|
// register the specified task with this registry, as long as the current error
|
|
|
|
// condition is nil. if no errors occur then return a copy of the registered task.
|
|
|
|
Register(*T, error) (*T, error)
|
|
|
|
|
|
|
|
// unregister the specified task from this registry
|
|
|
|
Unregister(*T)
|
|
|
|
|
|
|
|
// update state for the registered task identified by task.ID, returning a copy of
|
|
|
|
// the updated task, if any.
|
|
|
|
Update(task *T) error
|
|
|
|
|
|
|
|
// return the task registered for the specified task ID and its current state.
|
|
|
|
// if there is no such task then StateUnknown is returned.
|
|
|
|
Get(taskId string) (task *T, currentState StateType)
|
|
|
|
|
|
|
|
// return the non-terminal task corresponding to the specified pod ID
|
|
|
|
ForPod(podID string) (task *T, currentState StateType)
|
|
|
|
|
|
|
|
// update the task status given the specified mesos task status update, returning a
|
|
|
|
// copy of the updated task (if any) and its state.
|
|
|
|
UpdateStatus(status *mesos.TaskStatus) (*T, StateType)
|
|
|
|
|
|
|
|
// return a list of task ID's that match the given filter, or all task ID's if filter == nil.
|
|
|
|
List(filter func(*T) bool) []*T
|
|
|
|
}
|
|
|
|
|
|
|
|
type inMemoryRegistry struct {
|
|
|
|
rw sync.RWMutex
|
|
|
|
taskRegistry map[string]*T
|
|
|
|
tasksFinished *ring.Ring
|
|
|
|
podToTask map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewInMemoryRegistry() Registry {
|
|
|
|
return &inMemoryRegistry{
|
|
|
|
taskRegistry: make(map[string]*T),
|
|
|
|
tasksFinished: ring.New(defaultFinishedTasksSize),
|
|
|
|
podToTask: make(map[string]string),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) List(accepts func(t *T) bool) (tasks []*T) {
|
|
|
|
k.rw.RLock()
|
|
|
|
defer k.rw.RUnlock()
|
|
|
|
for _, task := range k.taskRegistry {
|
|
|
|
if accepts == nil || accepts(task) {
|
|
|
|
tasks = append(tasks, task.Clone())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) ForPod(podID string) (task *T, currentState StateType) {
|
|
|
|
k.rw.RLock()
|
|
|
|
defer k.rw.RUnlock()
|
|
|
|
tid, ok := k.podToTask[podID]
|
|
|
|
if !ok {
|
|
|
|
return nil, StateUnknown
|
|
|
|
}
|
|
|
|
t, state := k._get(tid)
|
|
|
|
return t.Clone(), state
|
|
|
|
}
|
|
|
|
|
|
|
|
// registers a pod task unless the spec'd error is not nil
|
|
|
|
func (k *inMemoryRegistry) Register(task *T, err error) (*T, error) {
|
|
|
|
if err == nil {
|
|
|
|
k.rw.Lock()
|
|
|
|
defer k.rw.Unlock()
|
|
|
|
if _, found := k.podToTask[task.podKey]; found {
|
|
|
|
return nil, fmt.Errorf("task already registered for pod key %q", task.podKey)
|
|
|
|
}
|
|
|
|
if _, found := k.taskRegistry[task.ID]; found {
|
|
|
|
return nil, fmt.Errorf("task already registered for id %q", task.ID)
|
|
|
|
}
|
|
|
|
k.podToTask[task.podKey] = task.ID
|
|
|
|
k.taskRegistry[task.ID] = task
|
|
|
|
}
|
|
|
|
return task.Clone(), err
|
|
|
|
}
|
|
|
|
|
|
|
|
// updates internal task state. updates are limited to Spec, Flags, and Offer for
|
|
|
|
// StatePending tasks, and are limited to Flag updates (additive only) for StateRunning tasks.
|
|
|
|
func (k *inMemoryRegistry) Update(task *T) error {
|
|
|
|
if task == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
k.rw.Lock()
|
|
|
|
defer k.rw.Unlock()
|
|
|
|
switch internal, state := k._get(task.ID); state {
|
|
|
|
case StateUnknown:
|
|
|
|
return fmt.Errorf("no such task: %v", task.ID)
|
|
|
|
case StatePending:
|
|
|
|
internal.Offer = task.Offer
|
|
|
|
internal.Spec = task.Spec
|
|
|
|
(&task.Spec).copyTo(&internal.Spec)
|
|
|
|
internal.Flags = map[FlagType]struct{}{}
|
|
|
|
fallthrough
|
|
|
|
case StateRunning:
|
|
|
|
for k, v := range task.Flags {
|
|
|
|
internal.Flags[k] = v
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("may not update task %v in state %v", task.ID, state)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) Unregister(task *T) {
|
|
|
|
k.rw.Lock()
|
|
|
|
defer k.rw.Unlock()
|
|
|
|
delete(k.podToTask, task.podKey)
|
|
|
|
delete(k.taskRegistry, task.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) Get(taskId string) (*T, StateType) {
|
|
|
|
k.rw.RLock()
|
|
|
|
defer k.rw.RUnlock()
|
|
|
|
t, state := k._get(taskId)
|
|
|
|
return t.Clone(), state
|
|
|
|
}
|
|
|
|
|
|
|
|
// assume that the caller has already locked around access to task state.
|
|
|
|
// the caller is also responsible for cloning the task object before it leaves
|
|
|
|
// the context of this registry.
|
|
|
|
func (k *inMemoryRegistry) _get(taskId string) (*T, StateType) {
|
|
|
|
if task, found := k.taskRegistry[taskId]; found {
|
|
|
|
return task, task.State
|
|
|
|
}
|
|
|
|
return nil, StateUnknown
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) {
|
|
|
|
taskId := status.GetTaskId().GetValue()
|
|
|
|
|
|
|
|
k.rw.Lock()
|
|
|
|
defer k.rw.Unlock()
|
|
|
|
task, state := k._get(taskId)
|
|
|
|
|
|
|
|
switch status.GetState() {
|
|
|
|
case mesos.TaskState_TASK_STAGING:
|
|
|
|
k.handleTaskStaging(task, state, status)
|
|
|
|
case mesos.TaskState_TASK_STARTING:
|
|
|
|
k.handleTaskStarting(task, state, status)
|
|
|
|
case mesos.TaskState_TASK_RUNNING:
|
|
|
|
k.handleTaskRunning(task, state, status)
|
|
|
|
case mesos.TaskState_TASK_FINISHED:
|
|
|
|
k.handleTaskFinished(task, state, status)
|
|
|
|
case mesos.TaskState_TASK_FAILED:
|
|
|
|
k.handleTaskFailed(task, state, status)
|
2015-08-18 08:31:50 +00:00
|
|
|
case mesos.TaskState_TASK_ERROR:
|
|
|
|
k.handleTaskError(task, state, status)
|
2015-06-10 20:58:22 +00:00
|
|
|
case mesos.TaskState_TASK_KILLED:
|
|
|
|
k.handleTaskKilled(task, state, status)
|
|
|
|
case mesos.TaskState_TASK_LOST:
|
|
|
|
k.handleTaskLost(task, state, status)
|
|
|
|
default:
|
|
|
|
log.Warningf("unhandled status update for task: %v", taskId)
|
|
|
|
}
|
|
|
|
return task.Clone(), state
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER {
|
|
|
|
log.Errorf("received STAGING for task %v with unexpected source: %v",
|
|
|
|
status.GetTaskId().GetValue(), status.GetSource())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
// we expect to receive this when a launched task is finally "bound"
|
|
|
|
// via the API server. however, there's nothing specific for us to do here.
|
|
|
|
switch state {
|
|
|
|
case StatePending:
|
|
|
|
task.UpdatedTime = time.Now()
|
|
|
|
if !task.Has(Bound) {
|
|
|
|
task.Set(Bound)
|
|
|
|
task.bindTime = task.UpdatedTime
|
|
|
|
timeToBind := task.bindTime.Sub(task.launchTime)
|
|
|
|
metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind))
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
taskId := status.GetTaskId().GetValue()
|
|
|
|
log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
taskId := status.GetTaskId().GetValue()
|
|
|
|
switch state {
|
|
|
|
case StatePending:
|
|
|
|
task.UpdatedTime = time.Now()
|
|
|
|
log.Infof("Received running status for pending task: %v", taskId)
|
|
|
|
fillRunningPodInfo(task, status)
|
|
|
|
task.State = StateRunning
|
|
|
|
case StateRunning:
|
|
|
|
task.UpdatedTime = time.Now()
|
|
|
|
log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId)
|
|
|
|
case StateFinished:
|
|
|
|
log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId)
|
|
|
|
default:
|
|
|
|
log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func ParsePodStatusResult(taskStatus *mesos.TaskStatus) (result api.PodStatusResult, err error) {
|
|
|
|
if taskStatus.Data != nil {
|
|
|
|
err = json.Unmarshal(taskStatus.Data, &result)
|
|
|
|
} else {
|
|
|
|
err = fmt.Errorf("missing TaskStatus.Data")
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) {
|
|
|
|
if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER {
|
|
|
|
// there is no data..
|
|
|
|
return
|
|
|
|
}
|
|
|
|
//TODO(jdef) determine the usefullness of this information (if any)
|
|
|
|
if result, err := ParsePodStatusResult(taskStatus); err != nil {
|
|
|
|
log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err)
|
|
|
|
} else {
|
|
|
|
task.podStatus = result.Status
|
|
|
|
log.Infof("received pod status for task %v: %+v", task.ID, result.Status)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
taskId := status.GetTaskId().GetValue()
|
|
|
|
switch state {
|
|
|
|
case StatePending:
|
|
|
|
panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId))
|
|
|
|
case StateRunning:
|
|
|
|
log.V(2).Infof("received finished status for running task: %v", taskId)
|
|
|
|
delete(k.podToTask, task.podKey)
|
|
|
|
task.State = StateFinished
|
|
|
|
task.UpdatedTime = time.Now()
|
|
|
|
k.tasksFinished = k.recordFinishedTask(task.ID)
|
|
|
|
case StateFinished:
|
|
|
|
log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId)
|
|
|
|
default:
|
|
|
|
log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// record that a task has finished.
|
|
|
|
// older record are expunged one at a time once the historical ring buffer is saturated.
|
|
|
|
// assumes caller is holding state lock.
|
|
|
|
func (k *inMemoryRegistry) recordFinishedTask(taskId string) *ring.Ring {
|
|
|
|
slot := k.tasksFinished.Next()
|
|
|
|
if slot.Value != nil {
|
|
|
|
// garbage collect older finished task from the registry
|
|
|
|
gctaskId := slot.Value.(string)
|
|
|
|
if gctask, found := k.taskRegistry[gctaskId]; found && gctask.State == StateFinished {
|
|
|
|
delete(k.taskRegistry, gctaskId)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
slot.Value = taskId
|
|
|
|
return slot
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskFailed(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
switch state {
|
2015-08-18 08:31:50 +00:00
|
|
|
case StatePending, StateRunning:
|
2015-06-10 20:58:22 +00:00
|
|
|
delete(k.taskRegistry, task.ID)
|
|
|
|
delete(k.podToTask, task.podKey)
|
2015-08-18 08:31:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskError(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
switch state {
|
|
|
|
case StatePending, StateRunning:
|
2015-06-10 20:58:22 +00:00
|
|
|
delete(k.taskRegistry, task.ID)
|
|
|
|
delete(k.podToTask, task.podKey)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskKilled(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
defer func() {
|
|
|
|
msg := fmt.Sprintf("task killed: %+v, task %+v", status, task)
|
|
|
|
if task != nil && task.Has(Deleted) {
|
|
|
|
// we were expecting this, nothing out of the ordinary
|
|
|
|
log.V(2).Infoln(msg)
|
|
|
|
} else {
|
|
|
|
log.Errorln(msg)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
switch state {
|
|
|
|
case StatePending, StateRunning:
|
|
|
|
delete(k.taskRegistry, task.ID)
|
|
|
|
delete(k.podToTask, task.podKey)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (k *inMemoryRegistry) handleTaskLost(task *T, state StateType, status *mesos.TaskStatus) {
|
|
|
|
switch state {
|
|
|
|
case StateRunning, StatePending:
|
|
|
|
delete(k.taskRegistry, task.ID)
|
|
|
|
delete(k.podToTask, task.podKey)
|
|
|
|
}
|
|
|
|
}
|