Merge pull request #11047 from gurvindersingh/master

modified spark example to use kubectl exec to interact with cluster a…
pull/6/head
Brendan Burns 2015-07-30 10:01:10 -07:00
commit 1d9a0623f2
7 changed files with 118 additions and 33 deletions

View File

@ -317,6 +317,7 @@ func TestExampleObjectSchemas(t *testing.T) {
"spark-master-service": &api.Service{},
"spark-master": &api.Pod{},
"spark-worker-controller": &api.ReplicationController{},
"spark-driver": &api.Pod{},
},
"../examples/storm": {
"storm-nimbus-service": &api.Service{},

View File

@ -144,45 +144,36 @@ $ kubectl logs spark-master
15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM
```
## Step Three: Do something with the cluster
## Step Three: Start your Spark driver to launch jobs on your Spark cluster
Get the address and port of the Master service.
The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html).
```sh
$ kubectl get service spark-master
NAME LABELS SELECTOR IP(S) PORT(S)
spark-master name=spark-master name=spark-master 10.0.204.187 7077/TCP
```shell
$ kubectl create -f examples/spark/spark-driver.json
```
SSH to one of your cluster nodes. On GCE/GKE you can either use [Developers Console](https://console.developers.google.com)
(more details [here](https://cloud.google.com/compute/docs/ssh-in-browser))
or run `gcloud compute ssh <name>` where the name can be taken from `kubectl get nodes`
(more details [here](https://cloud.google.com/compute/docs/gcloud-compute/#connecting)).
The Spark driver needs the Master service to be running.
```
$ kubectl get nodes
NAME LABELS STATUS
kubernetes-minion-5jvu kubernetes.io/hostname=kubernetes-minion-5jvu Ready
kubernetes-minion-6fbi kubernetes.io/hostname=kubernetes-minion-6fbi Ready
kubernetes-minion-8y2v kubernetes.io/hostname=kubernetes-minion-8y2v Ready
kubernetes-minion-h0tr kubernetes.io/hostname=kubernetes-minion-h0tr Ready
### Check to see if the driver is running
$ gcloud compute ssh kubernetes-minion-5jvu --zone=us-central1-b
Linux kubernetes-minion-5jvu 3.16.0-0.bpo.4-amd64 #1 SMP Debian 3.16.7-ckt9-3~deb8u1~bpo70+1 (2015-04-27) x86_64
=== GCE Kubernetes node setup complete ===
me@kubernetes-minion-5jvu:~$
```shell
$ kubectl get pods
NAME READY REASON RESTARTS AGE
[...]
spark-master 1/1 Running 0 14m
spark-driver 1/1 Running 0 10m
```
Once logged in run spark-base image. Inside of the image there is a script
that sets up the environment based on the provided IP and port of the Master.
## Step Four: Do something with the cluster
Use the kubectl exec to connect to Spark driver
```
cluster-node $ sudo docker run -it gcr.io/google_containers/spark-base
root@f12a6fec45ce:/# . /setup_client.sh 10.0.204.187 7077
root@f12a6fec45ce:/# pyspark
Python 2.7.9 (default, Mar 1 2015, 12:57:24)
$ kubectl exec spark-driver -it bash
root@spark-driver:/#
root@spark-driver:/# pyspark
Python 2.7.9 (default, Mar 1 2015, 12:57:24)
[GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
@ -201,9 +192,9 @@ SparkContext available as sc, HiveContext available as sqlContext.
## Result
You now have services, replication controllers, and pods for the Spark master and Spark workers.
You can take this example to the next step and start using the Apache Spark cluster
you just created, see [Spark documentation](https://spark.apache.org/documentation.html)
You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers.
You can take this example to the next step and start using the Apache Spark cluster
you just created, see [Spark documentation](https://spark.apache.org/documentation.html)
for more information.
## tl;dr
@ -216,6 +207,8 @@ Make sure the Master Pod is running (use: ```kubectl get pods```).
```kubectl create -f spark-worker-controller.json```
```kubectl create -f spark-driver.json```
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()

View File

@ -0,0 +1,4 @@
FROM gcr.io/google_containers/spark-base
ADD start.sh /start.sh
ADD log4j.properties /opt/spark/conf/log4j.properties
CMD ["/start.sh"]

View File

@ -0,0 +1,37 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
<strong>
The latest 1.0.x release of this document can be found
[here](http://releases.k8s.io/release-1.0/examples/spark/images/driver/README.md).
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/images/driver/README.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,23 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts
echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh
echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh
while true; do
sleep 100
done

View File

@ -0,0 +1,23 @@
{
"kind": "Pod",
"apiVersion": "v1",
"metadata": {
"name": "spark-driver",
"labels": {
"name": "spark-driver"
}
},
"spec": {
"containers": [
{
"name": "spark-driver",
"image": "gurvin/spark-driver",
"resources": {
"limits": {
"cpu": "100m"
}
}
}
]
}
}

View File

@ -170,22 +170,26 @@ var _ = Describe("Examples e2e", func() {
})
Describe("[Skipped][Example]Spark", func() {
It("should start spark master and workers", func() {
It("should start spark master, driver and workers", func() {
mkpath := func(file string) string {
return filepath.Join(testContext.RepoRoot, "examples", "spark", file)
}
serviceJson := mkpath("spark-master-service.json")
masterJson := mkpath("spark-master.json")
driverJson := mkpath("spark-driver.json")
workerControllerJson := mkpath("spark-worker-controller.json")
nsFlag := fmt.Sprintf("--namespace=%v", ns)
By("starting master")
runKubectl("create", "-f", serviceJson, nsFlag)
runKubectl("create", "-f", masterJson, nsFlag)
runKubectl("create", "-f", driverJson, nsFlag)
err := waitForPodRunningInNamespace(c, "spark-master", ns)
Expect(err).NotTo(HaveOccurred())
_, err = lookForStringInLog(ns, "spark-master", "spark-master", "Starting Spark master at", serverStartTimeout)
Expect(err).NotTo(HaveOccurred())
_, err = lookForStringInLog(ns, "spark-driver", "spark-driver", "Starting Spark driver at", serverStartTimeout)
Expect(err).NotTo(HaveOccurred())
By("starting workers")
runKubectl("create", "-f", workerControllerJson, nsFlag)