From a046fa478d33d17657bb3a240c714610c3d80ae4 Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Fri, 10 Jul 2015 11:19:55 +0200 Subject: [PATCH 1/6] modified spark example to use kubectl exec to interact with cluster and create spark driver pod --- examples/spark/README.md | 57 +++++++++++-------------- examples/spark/images/driver/Dockerfile | 4 ++ examples/spark/images/driver/README.md | 0 examples/spark/images/driver/start.sh | 9 ++++ examples/spark/spark-driver.json | 23 ++++++++++ 5 files changed, 61 insertions(+), 32 deletions(-) create mode 100644 examples/spark/images/driver/Dockerfile create mode 100644 examples/spark/images/driver/README.md create mode 100755 examples/spark/images/driver/start.sh create mode 100644 examples/spark/spark-driver.json diff --git a/examples/spark/README.md b/examples/spark/README.md index c401311269..d2ac4575cd 100644 --- a/examples/spark/README.md +++ b/examples/spark/README.md @@ -110,44 +110,35 @@ $ kubectl logs spark-master 15/06/26 14:15:55 INFO Master: Registering worker 10.244.1.15:44839 with 1 cores, 2.6 GB RAM 15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM ``` -## Step Three: Do something with the cluster -Get the address and port of the Master service. +## Step Three: Start your Spark driver to launch jobs on your Spark cluster + +The Spark driver is used to launch jobs into Spark cluster. You can read more about it in +[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html). ```shell -$ kubectl get service spark-master -NAME LABELS SELECTOR IP(S) PORT(S) -spark-master name=spark-master name=spark-master 10.0.204.187 7077/TCP +$ kubectl create -f examples/spark/spark-driver.json +``` +The Spark driver needs the Master service to be running. + +### Check to see if the driver is running + +```shell +$ kubectl get pods +NAME READY REASON RESTARTS AGE +[...] +spark-master 1/1 Running 0 14m +spark-driver 1/1 Running 0 10m ``` -SSH to one of your cluster nodes. On GCE/GKE you can either use [Developers Console](https://console.developers.google.com) -(more details [here](https://cloud.google.com/compute/docs/ssh-in-browser)) -or run `gcloud compute ssh ` where the name can be taken from `kubectl get nodes` -(more details [here](https://cloud.google.com/compute/docs/gcloud-compute/#connecting)). +## Step Four: Do something with the cluster + +Use the kubectl exec to connect to Spark driver ``` -$ kubectl get nodes -NAME LABELS STATUS -kubernetes-minion-5jvu kubernetes.io/hostname=kubernetes-minion-5jvu Ready -kubernetes-minion-6fbi kubernetes.io/hostname=kubernetes-minion-6fbi Ready -kubernetes-minion-8y2v kubernetes.io/hostname=kubernetes-minion-8y2v Ready -kubernetes-minion-h0tr kubernetes.io/hostname=kubernetes-minion-h0tr Ready - -$ gcloud compute ssh kubernetes-minion-5jvu --zone=us-central1-b -Linux kubernetes-minion-5jvu 3.16.0-0.bpo.4-amd64 #1 SMP Debian 3.16.7-ckt9-3~deb8u1~bpo70+1 (2015-04-27) x86_64 - -=== GCE Kubernetes node setup complete === - -me@kubernetes-minion-5jvu:~$ -``` - -Once logged in run spark-base image. Inside of the image there is a script -that sets up the environment based on the provided IP and port of the Master. - -``` -cluster-node $ sudo docker run -it gcr.io/google_containers/spark-base -root@f12a6fec45ce:/# . /setup_client.sh 10.0.204.187 7077 -root@f12a6fec45ce:/# pyspark +$ kubectl exec spark-driver -it bash +root@spark-driver:/# +root@spark-driver:/# pyspark Python 2.7.9 (default, Mar 1 2015, 12:57:24) [GCC 4.9.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. @@ -166,7 +157,7 @@ SparkContext available as sc, HiveContext available as sqlContext. ``` ## Result -You now have services, replication controllers, and pods for the Spark master and Spark workers. +You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers. You can take this example to the next step and start using the Apache Spark cluster you just created, see [Spark documentation](https://spark.apache.org/documentation.html) for more information. @@ -181,4 +172,6 @@ Make sure the Master Pod is running (use: ```kubectl get pods```). ```kubectl create -f spark-worker-controller.json``` +```kubectl create -f spark-driver.json``` + [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]() diff --git a/examples/spark/images/driver/Dockerfile b/examples/spark/images/driver/Dockerfile new file mode 100644 index 0000000000..cfb1dad7df --- /dev/null +++ b/examples/spark/images/driver/Dockerfile @@ -0,0 +1,4 @@ +FROM gcr.io/google_containers/spark-base +ADD start.sh /start.sh +ADD log4j.properties /opt/spark/conf/log4j.properties +CMD ["/start.sh"] diff --git a/examples/spark/images/driver/README.md b/examples/spark/images/driver/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/spark/images/driver/start.sh b/examples/spark/images/driver/start.sh new file mode 100755 index 0000000000..495194dc38 --- /dev/null +++ b/examples/spark/images/driver/start.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts +echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh +echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh + +while true; do +sleep 100 +done diff --git a/examples/spark/spark-driver.json b/examples/spark/spark-driver.json new file mode 100644 index 0000000000..ee695eeabc --- /dev/null +++ b/examples/spark/spark-driver.json @@ -0,0 +1,23 @@ +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "spark-driver", + "labels": { + "name": "spark-driver" + } + }, + "spec": { + "containers": [ + { + "name": "spark-driver", + "image": "gurvin/spark-driver", + "resources": { + "limits": { + "cpu": "100m" + } + } + } + ] + } +} From 68f0db84cb60bc404b410820ea869b9d086f111d Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Sat, 25 Jul 2015 21:05:45 +0200 Subject: [PATCH 2/6] added indent for sleep --- examples/spark/images/driver/start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/spark/images/driver/start.sh b/examples/spark/images/driver/start.sh index 495194dc38..696c4dc644 100755 --- a/examples/spark/images/driver/start.sh +++ b/examples/spark/images/driver/start.sh @@ -5,5 +5,5 @@ echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh while true; do -sleep 100 + sleep 100 done From 5f48898498b4b066569bac39833689285d4a6a47 Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Sat, 25 Jul 2015 21:05:45 +0200 Subject: [PATCH 3/6] added indent for sleep --- examples/spark/images/driver/start.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/spark/images/driver/start.sh b/examples/spark/images/driver/start.sh index 495194dc38..696c4dc644 100755 --- a/examples/spark/images/driver/start.sh +++ b/examples/spark/images/driver/start.sh @@ -5,5 +5,5 @@ echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh while true; do -sleep 100 + sleep 100 done From 5599d83685e046d5ea2079f63cd4380e8e7d3565 Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Mon, 27 Jul 2015 23:01:49 +0200 Subject: [PATCH 4/6] added standard copyright --- examples/spark/images/driver/start.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/examples/spark/images/driver/start.sh b/examples/spark/images/driver/start.sh index 696c4dc644..13be069957 100755 --- a/examples/spark/images/driver/start.sh +++ b/examples/spark/images/driver/start.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright 2015 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh From f64d89fd1d0c81eb1e3e6ea3a6b2ca6571a65c4b Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Wed, 29 Jul 2015 22:21:10 +0200 Subject: [PATCH 5/6] fixed documentations after running gendocs.sh --- examples/spark/README.md | 2 ++ examples/spark/images/driver/README.md | 37 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/examples/spark/README.md b/examples/spark/README.md index b204fce0dd..cefc2762b3 100644 --- a/examples/spark/README.md +++ b/examples/spark/README.md @@ -152,6 +152,7 @@ The Spark driver is used to launch jobs into Spark cluster. You can read more ab ```shell $ kubectl create -f examples/spark/spark-driver.json ``` + The Spark driver needs the Master service to be running. ### Check to see if the driver is running @@ -208,6 +209,7 @@ Make sure the Master Pod is running (use: ```kubectl get pods```). ```kubectl create -f spark-driver.json``` + [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]() diff --git a/examples/spark/images/driver/README.md b/examples/spark/images/driver/README.md index e69de29bb2..2a36c4ee68 100644 --- a/examples/spark/images/driver/README.md +++ b/examples/spark/images/driver/README.md @@ -0,0 +1,37 @@ + + + + +WARNING +WARNING +WARNING +WARNING +WARNING + +

PLEASE NOTE: This document applies to the HEAD of the source tree

+ +If you are using a released version of Kubernetes, you should +refer to the docs that go with that version. + + +The latest 1.0.x release of this document can be found +[here](http://releases.k8s.io/release-1.0/examples/spark/images/driver/README.md). + +Documentation for other releases can be found at +[releases.k8s.io](http://releases.k8s.io). + +-- + + + + + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/images/driver/README.md?pixel)]() + From e1d76d5f19e84f87d4e7e51618a38ade9eda524c Mon Sep 17 00:00:00 2001 From: Gurvinder Singh Date: Thu, 30 Jul 2015 09:47:56 +0200 Subject: [PATCH 6/6] added test for spark driver too --- examples/examples_test.go | 1 + test/e2e/examples.go | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/examples_test.go b/examples/examples_test.go index bdad9c9b06..d5e42f526a 100644 --- a/examples/examples_test.go +++ b/examples/examples_test.go @@ -317,6 +317,7 @@ func TestExampleObjectSchemas(t *testing.T) { "spark-master-service": &api.Service{}, "spark-master": &api.Pod{}, "spark-worker-controller": &api.ReplicationController{}, + "spark-driver": &api.Pod{}, }, "../examples/storm": { "storm-nimbus-service": &api.Service{}, diff --git a/test/e2e/examples.go b/test/e2e/examples.go index 75a8d6de6b..6c3d4ef309 100644 --- a/test/e2e/examples.go +++ b/test/e2e/examples.go @@ -159,22 +159,26 @@ var _ = Describe("Examples e2e", func() { }) Describe("[Skipped][Example]Spark", func() { - It("should start spark master and workers", func() { + It("should start spark master, driver and workers", func() { mkpath := func(file string) string { return filepath.Join(testContext.RepoRoot, "examples", "spark", file) } serviceJson := mkpath("spark-master-service.json") masterJson := mkpath("spark-master.json") + driverJson := mkpath("spark-driver.json") workerControllerJson := mkpath("spark-worker-controller.json") nsFlag := fmt.Sprintf("--namespace=%v", ns) By("starting master") runKubectl("create", "-f", serviceJson, nsFlag) runKubectl("create", "-f", masterJson, nsFlag) + runKubectl("create", "-f", driverJson, nsFlag) err := waitForPodRunningInNamespace(c, "spark-master", ns) Expect(err).NotTo(HaveOccurred()) _, err = lookForStringInLog(ns, "spark-master", "spark-master", "Starting Spark master at", serverStartTimeout) Expect(err).NotTo(HaveOccurred()) + _, err = lookForStringInLog(ns, "spark-driver", "spark-driver", "Starting Spark driver at", serverStartTimeout) + Expect(err).NotTo(HaveOccurred()) By("starting workers") runKubectl("create", "-f", workerControllerJson, nsFlag)