Spark: Add liveness probes for master/worker

It turns out, the pods don't exit when the master or worker crash.

Along the way: remove redundant metadata.
pull/6/head
Zach Loafman 2015-10-29 05:22:56 -07:00
parent 413ec6c83a
commit d24d2a21f9
4 changed files with 18 additions and 9 deletions

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1
metadata:
name: spark-driver-controller
labels:
component: spark-driver
spec:
replicas: 1
selector:

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1
metadata:
name: spark-master-controller
labels:
component: spark-master
spec:
replicas: 1
selector:
@ -19,6 +17,15 @@ spec:
ports:
- containerPort: 7077
- containerPort: 8080
livenessProbe:
exec:
command:
- /opt/spark/sbin/spark-daemon.sh
- status
- org.apache.spark.deploy.master.Master
- '1'
initialDelaySeconds: 30
timeoutSeconds: 1
resources:
requests:
cpu: 100m

View File

@ -2,8 +2,6 @@ kind: Service
apiVersion: v1
metadata:
name: spark-master
labels:
component: spark-master-service
spec:
ports:
- port: 7077

View File

@ -2,8 +2,6 @@ kind: ReplicationController
apiVersion: v1
metadata:
name: spark-worker-controller
labels:
component: spark-worker
spec:
replicas: 3
selector:
@ -12,13 +10,21 @@ spec:
metadata:
labels:
component: spark-worker
uses: spark-master
spec:
containers:
- name: spark-worker
image: gcr.io/google_containers/spark-worker:1.5.1_v1
ports:
- containerPort: 8888
livenessProbe:
exec:
command:
- /opt/spark/sbin/spark-daemon.sh
- status
- org.apache.spark.deploy.worker.Worker
- '1'
initialDelaySeconds: 30
timeoutSeconds: 1
resources:
requests:
cpu: 100m