Merge pull request #11047 from gurvindersingh/master

modified spark example to use kubectl exec to interact with cluster a…
2015-07-30 10:01:10 -07:00 · 2015-07-30 10:01:10 -07:00 · 1d9a0623f2
parent 0ae48c449e e1d76d5f19
commit 1d9a0623f2
7 changed files with 118 additions and 33 deletions
--- a/examples/examples_test.go
+++ b/examples/examples_test.go
@ -317,6 +317,7 @@ func TestExampleObjectSchemas(t *testing.T) {
 			"spark-master-service":    &api.Service{},
 			"spark-master":            &api.Pod{},
 			"spark-worker-controller": &api.ReplicationController{},
+			"spark-driver":            &api.Pod{},
 		},
 		"../examples/storm": {
 			"storm-nimbus-service":    &api.Service{},
--- a/examples/spark/README.md
+++ b/examples/spark/README.md
@ -144,45 +144,36 @@ $ kubectl logs spark-master
 15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM
 ```

-## Step Three: Do something with the cluster
+## Step Three: Start your Spark driver to launch jobs on your Spark cluster

-Get the address and port of the Master service.
+The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
+[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html).

-```sh
-$ kubectl get service spark-master
-NAME           LABELS              SELECTOR            IP(S)          PORT(S)
-spark-master   name=spark-master   name=spark-master   10.0.204.187   7077/TCP
+```shell
+$ kubectl create -f examples/spark/spark-driver.json
 ```

-SSH to one of your cluster nodes. On GCE/GKE you can either use [Developers Console](https://console.developers.google.com)
-(more details [here](https://cloud.google.com/compute/docs/ssh-in-browser))
-or run  `gcloud compute ssh <name>` where the name can be taken from `kubectl get nodes`
-(more details [here](https://cloud.google.com/compute/docs/gcloud-compute/#connecting)).
+The Spark driver needs the Master service to be running.

-```
-$ kubectl get nodes
-NAME                     LABELS                                          STATUS
-kubernetes-minion-5jvu   kubernetes.io/hostname=kubernetes-minion-5jvu   Ready
-kubernetes-minion-6fbi   kubernetes.io/hostname=kubernetes-minion-6fbi   Ready
-kubernetes-minion-8y2v   kubernetes.io/hostname=kubernetes-minion-8y2v   Ready
-kubernetes-minion-h0tr   kubernetes.io/hostname=kubernetes-minion-h0tr   Ready
+### Check to see if the driver is running

-$ gcloud compute ssh kubernetes-minion-5jvu --zone=us-central1-b
-Linux kubernetes-minion-5jvu 3.16.0-0.bpo.4-amd64 #1 SMP Debian 3.16.7-ckt9-3~deb8u1~bpo70+1 (2015-04-27) x86_64
-
-=== GCE Kubernetes node setup complete ===
-
-me@kubernetes-minion-5jvu:~$
+```shell
+$ kubectl get pods
+NAME                                           READY     REASON    RESTARTS   AGE
+[...]
+spark-master                                    1/1       Running   0          14m
+spark-driver                                    1/1       Running   0          10m
 ```

-Once logged in run spark-base image. Inside of the image there is a script
-that sets up the environment based on the provided IP and port of the Master.
+## Step Four: Do something with the cluster
+
+Use the kubectl exec to connect to Spark driver

 ```
-cluster-node $ sudo docker run -it gcr.io/google_containers/spark-base
-root@f12a6fec45ce:/# . /setup_client.sh 10.0.204.187 7077
-root@f12a6fec45ce:/# pyspark
-Python 2.7.9 (default, Mar  1 2015, 12:57:24) 
+$ kubectl exec spark-driver -it bash
+root@spark-driver:/#
+root@spark-driver:/# pyspark
+Python 2.7.9 (default, Mar  1 2015, 12:57:24)
 [GCC 4.9.2] on linux2
 Type "help", "copyright", "credits" or "license" for more information.
 15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
@ -201,9 +192,9 @@ SparkContext available as sc, HiveContext available as sqlContext.

 ## Result

-You now have services, replication controllers, and pods for the Spark master and Spark workers.
-You can take this example to the next step and start using the Apache Spark cluster 
-you just created, see [Spark documentation](https://spark.apache.org/documentation.html) 
+You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers.
+You can take this example to the next step and start using the Apache Spark cluster
+you just created, see [Spark documentation](https://spark.apache.org/documentation.html)
 for more information.

 ## tl;dr
@ -216,6 +207,8 @@ Make sure the Master Pod is running (use: ```kubectl get pods```).

 ```kubectl create -f spark-worker-controller.json```

+```kubectl create -f spark-driver.json```
+

 <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
 [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()
--- a/examples/spark/images/driver/Dockerfile
+++ b/examples/spark/images/driver/Dockerfile
@ -0,0 +1,4 @@
+FROM gcr.io/google_containers/spark-base
+ADD start.sh /start.sh
+ADD log4j.properties /opt/spark/conf/log4j.properties
+CMD ["/start.sh"]
--- a/examples/spark/images/driver/README.md
+++ b/examples/spark/images/driver/README.md
@ -0,0 +1,37 @@
+<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
+
+<!-- BEGIN STRIP_FOR_RELEASE -->
+
+<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
+     width="25" height="25">
+
+<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
+
+If you are using a released version of Kubernetes, you should
+refer to the docs that go with that version.
+
+<strong>
+The latest 1.0.x release of this document can be found
+[here](http://releases.k8s.io/release-1.0/examples/spark/images/driver/README.md).
+
+Documentation for other releases can be found at
+[releases.k8s.io](http://releases.k8s.io).
+</strong>
+--
+
+<!-- END STRIP_FOR_RELEASE -->
+
+<!-- END MUNGE: UNVERSIONED_WARNING -->
+
+
+<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/images/driver/README.md?pixel)]()
+<!-- END MUNGE: GENERATED_ANALYTICS -->
--- a/examples/spark/images/driver/start.sh
+++ b/examples/spark/images/driver/start.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+
+# Copyright 2015 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts
+echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh
+echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh
+
+while true; do
+  sleep 100
+done
--- a/examples/spark/spark-driver.json
+++ b/examples/spark/spark-driver.json
@ -0,0 +1,23 @@
+{
+  "kind": "Pod",
+  "apiVersion": "v1",
+  "metadata": {
+    "name": "spark-driver",
+    "labels": {
+      "name": "spark-driver"
+    }
+  },
+  "spec": {
+    "containers": [
+      {
+        "name": "spark-driver",
+        "image": "gurvin/spark-driver",
+        "resources": {
+          "limits": {
+            "cpu": "100m"
+          }
+        }
+      }
+    ]
+  }
+}
--- a/test/e2e/examples.go
+++ b/test/e2e/examples.go
@ -170,22 +170,26 @@ var _ = Describe("Examples e2e", func() {
 	})

 	Describe("[Skipped][Example]Spark", func() {
-		It("should start spark master and workers", func() {
+		It("should start spark master, driver and workers", func() {
 			mkpath := func(file string) string {
 				return filepath.Join(testContext.RepoRoot, "examples", "spark", file)
 			}
 			serviceJson := mkpath("spark-master-service.json")
 			masterJson := mkpath("spark-master.json")
+			driverJson := mkpath("spark-driver.json")
 			workerControllerJson := mkpath("spark-worker-controller.json")
 			nsFlag := fmt.Sprintf("--namespace=%v", ns)

 			By("starting master")
 			runKubectl("create", "-f", serviceJson, nsFlag)
 			runKubectl("create", "-f", masterJson, nsFlag)
+			runKubectl("create", "-f", driverJson, nsFlag)
 			err := waitForPodRunningInNamespace(c, "spark-master", ns)
 			Expect(err).NotTo(HaveOccurred())
 			_, err = lookForStringInLog(ns, "spark-master", "spark-master", "Starting Spark master at", serverStartTimeout)
 			Expect(err).NotTo(HaveOccurred())
+			_, err = lookForStringInLog(ns, "spark-driver", "spark-driver", "Starting Spark driver at", serverStartTimeout)
+			Expect(err).NotTo(HaveOccurred())

 			By("starting workers")
 			runKubectl("create", "-f", workerControllerJson, nsFlag)