Merge pull request #16320 from zmerlynn/spark-rcs

Auto commit by PR queue bot
pull/6/head
k8s-merge-robot 2015-10-29 01:42:31 -07:00
commit 63c512cfed
25 changed files with 355 additions and 231 deletions

View File

@ -356,10 +356,11 @@ func TestExampleObjectSchemas(t *testing.T) {
"secret": &api.Secret{}, "secret": &api.Secret{},
}, },
"../examples/spark": { "../examples/spark": {
"spark-driver-controller": &api.ReplicationController{},
"spark-master-controller": &api.ReplicationController{},
"spark-master-service": &api.Service{}, "spark-master-service": &api.Service{},
"spark-master": &api.Pod{}, "spark-webui": &api.Service{},
"spark-worker-controller": &api.ReplicationController{}, "spark-worker-controller": &api.ReplicationController{},
"spark-driver": &api.Pod{},
}, },
"../examples/spark/spark-gluster": { "../examples/spark/spark-gluster": {
"spark-master-service": &api.Service{}, "spark-master-service": &api.Service{},

View File

@ -57,60 +57,98 @@ instructions for your platform.
## Step One: Start your Master service ## Step One: Start your Master service
The Master [service](../../docs/user-guide/services.md) is the master (or head) service for a Spark The Master [service](../../docs/user-guide/services.md) is the master service
cluster. for a Spark cluster.
Use the [`examples/spark/spark-master.json`](spark-master.json) file to create a [pod](../../docs/user-guide/pods.md) running Use the
the Master service. [`examples/spark/spark-master-controller.yaml`](spark-master-controller.yaml)
file to create a
[replication controller](../../docs/user-guide/replication-controller.md)
running the Spark Master service.
```sh ```console
$ kubectl create -f examples/spark/spark-master.json $ kubectl create -f examples/spark/spark-master-controller.yaml
replicationcontrollers/spark-master-controller
``` ```
Then, use the [`examples/spark/spark-master-service.json`](spark-master-service.json) file to Then, use the
create a logical service endpoint that Spark workers can use to access [`examples/spark/spark-master-service.yaml`](spark-master-service.yaml) file to
the Master pod. create a logical service endpoint that Spark workers can use to access the
Master pod.
```sh ```console
$ kubectl create -f examples/spark/spark-master-service.json $ kubectl create -f examples/spark/spark-master-service.yaml
services/spark-master
```
Optionally, you can create a service for the Spark Master WebUI at this point as
well. If you are running on a cloud provider that supports it, this will create
an external load balancer and open a firewall to the Spark Master WebUI on the
cluster. **Note:** With the existing configuration, there is **ABSOLUTELY NO**
authentication on this WebUI. With slightly more work, it would be
straightforward to put an `nginx` proxy in front to password protect it.
```console
$ kubectl create -f examples/spark/spark-webui.yaml
services/spark-webui
``` ```
### Check to see if Master is running and accessible ### Check to see if Master is running and accessible
```sh ```console
$ kubectl get pods $ kubectl get pods
NAME READY STATUS RESTARTS AGE NAME READY STATUS RESTARTS AGE
[...] spark-master-controller-5u0q5 1/1 Running 0 8m
spark-master 1/1 Running 0 25s
``` ```
Check logs to see the status of the master. Check logs to see the status of the master. (Use the pod retrieved from the previous output.)
```sh ```sh
$ kubectl logs spark-master $ kubectl logs spark-master-controller-5u0q5
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.5.1-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master-controller-g0oao.out
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.4.0-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master.out Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /opt/spark-1.5.1-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.5.1-bin-hadoop2.6/lib/spark-assembly-1.5.1-hadoop2.6.0.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar -Xms1g -Xmx1g org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
Spark Command: /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java -cp /opt/spark-1.4.0-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar -Xms512m -Xmx512m -XX:MaxPermSize=128m org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
======================================== ========================================
15/06/26 14:01:49 INFO Master: Registered signal handlers for [TERM, HUP, INT] 15/10/27 21:25:05 INFO Master: Registered signal handlers for [TERM, HUP, INT]
15/06/26 14:01:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/10/27 21:25:05 INFO SecurityManager: Changing view acls to: root
15/06/26 14:01:51 INFO SecurityManager: Changing view acls to: root 15/10/27 21:25:05 INFO SecurityManager: Changing modify acls to: root
15/06/26 14:01:51 INFO SecurityManager: Changing modify acls to: root 15/10/27 21:25:05 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
15/06/26 14:01:51 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root) 15/10/27 21:25:06 INFO Slf4jLogger: Slf4jLogger started
15/06/26 14:01:51 INFO Slf4jLogger: Slf4jLogger started 15/10/27 21:25:06 INFO Remoting: Starting remoting
15/06/26 14:01:51 INFO Remoting: Starting remoting 15/10/27 21:25:06 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
15/06/26 14:01:52 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077] 15/10/27 21:25:06 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
15/06/26 14:01:52 INFO Utils: Successfully started service 'sparkMaster' on port 7077. 15/10/27 21:25:07 INFO Master: Starting Spark master at spark://spark-master:7077
15/06/26 14:01:52 INFO Utils: Successfully started service on port 6066. 15/10/27 21:25:07 INFO Master: Running Spark version 1.5.1
15/06/26 14:01:52 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066 15/10/27 21:25:07 INFO Utils: Successfully started service 'MasterUI' on port 8080.
15/06/26 14:01:52 INFO Master: Starting Spark master at spark://spark-master:7077 15/10/27 21:25:07 INFO MasterWebUI: Started MasterWebUI at http://spark-master:8080
15/06/26 14:01:52 INFO Master: Running Spark version 1.4.0 15/10/27 21:25:07 INFO Utils: Successfully started service on port 6066.
15/06/26 14:01:52 INFO Utils: Successfully started service 'MasterUI' on port 8080. 15/10/27 21:25:07 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066
15/06/26 14:01:52 INFO MasterWebUI: Started MasterWebUI at http://10.244.2.34:8080 15/10/27 21:25:07 INFO Master: I have been elected leader! New state: ALIVE
15/06/26 14:01:53 INFO Master: I have been elected leader! New state: ALIVE
``` ```
If you created the Spark WebUI and waited sufficient time for the load balancer
to be create, the `spark-webui` service should look something like this:
```console
$ kubectl describe services/spark-webui
Name: spark-webui
Namespace: default
Labels: <none>
Selector: component=spark-master
Type: LoadBalancer
IP: 10.0.152.249
LoadBalancer Ingress: 104.197.147.190
Port: <unnamed> 8080/TCP
NodePort: <unnamed> 31141/TCP
Endpoints: 10.244.1.12:8080
Session Affinity: None
Events: [...]
```
You should now be able to visit `http://104.197.147.190:8080` and see the Spark
Master UI. *Note:* After workers connect, this UI has links to worker Web
UIs. The worker UI links do not work (the links attempt to connect to cluster
IPs).
## Step Two: Start your Spark workers ## Step Two: Start your Spark workers
The Spark workers do the heavy lifting in a Spark cluster. They The Spark workers do the heavy lifting in a Spark cluster. They
@ -119,96 +157,104 @@ program.
The Spark workers need the Master service to be running. The Spark workers need the Master service to be running.
Use the [`examples/spark/spark-worker-controller.json`](spark-worker-controller.json) file to create a Use the [`examples/spark/spark-worker-controller.yaml`](spark-worker-controller.yaml) file to create a
[replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods. [replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods.
```sh ```console
$ kubectl create -f examples/spark/spark-worker-controller.json $ kubectl create -f examples/spark/spark-worker-controller.yaml
``` ```
### Check to see if the workers are running ### Check to see if the workers are running
```sh If you launched the Spark WebUI, your workers should just appear in the UI when
$ kubectl get pods they're ready. (It may take a little bit to pull the images and launch the
NAME READY STATUS RESTARTS AGE pods.) You can also interrogate the status in the following way:
[...]
spark-master 1/1 Running 0 14m
spark-worker-controller-hifwi 1/1 Running 0 33s
spark-worker-controller-u40r2 1/1 Running 0 33s
spark-worker-controller-vpgyg 1/1 Running 0 33s
$ kubectl logs spark-master ```console
$ kubectl get pods
NAME READY STATUS RESTARTS AGE
spark-master-controller-5u0q5 1/1 Running 0 25m
spark-worker-controller-e8otp 1/1 Running 0 6m
spark-worker-controller-fiivl 1/1 Running 0 6m
spark-worker-controller-ytc7o 1/1 Running 0 6m
$ kubectl logs spark-master-controller-5u0q5
[...] [...]
15/06/26 14:15:43 INFO Master: Registering worker 10.244.2.35:46199 with 1 cores, 2.6 GB RAM 15/10/26 18:20:14 INFO Master: Registering worker 10.244.1.13:53567 with 2 cores, 6.3 GB RAM
15/06/26 14:15:55 INFO Master: Registering worker 10.244.1.15:44839 with 1 cores, 2.6 GB RAM 15/10/26 18:20:14 INFO Master: Registering worker 10.244.2.7:46195 with 2 cores, 6.3 GB RAM
15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM 15/10/26 18:20:14 INFO Master: Registering worker 10.244.3.8:39926 with 2 cores, 6.3 GB RAM
``` ```
## Step Three: Start your Spark driver to launch jobs on your Spark cluster ## Step Three: Start your Spark driver to launch jobs on your Spark cluster
The Spark driver is used to launch jobs into Spark cluster. You can read more about it in The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html). [Spark architecture](https://spark.apache.org/docs/latest/cluster-overview.html).
```shell ```console
$ kubectl create -f examples/spark/spark-driver.json $ kubectl create -f examples/spark/spark-driver-controller.yaml
replicationcontrollers/spark-driver-controller
``` ```
The Spark driver needs the Master service to be running. The Spark driver needs the Master service to be running.
### Check to see if the driver is running ### Check to see if the driver is running
```shell ```console
$ kubectl get pods $ kubectl get pods -lcomponent=spark-driver
NAME READY REASON RESTARTS AGE NAME READY STATUS RESTARTS AGE
[...] spark-driver-controller-vwb9c 1/1 Running 0 1m
spark-master 1/1 Running 0 14m
spark-driver 1/1 Running 0 10m
``` ```
## Step Four: Do something with the cluster ## Step Four: Do something with the cluster
Use the kubectl exec to connect to Spark driver Use the kubectl exec to connect to Spark driver and run a pipeline.
``` ```console
$ kubectl exec spark-driver -it bash $ kubectl exec spark-driver-controller-vwb9c -it pyspark
root@spark-driver:/#
root@spark-driver:/# pyspark
Python 2.7.9 (default, Mar 1 2015, 12:57:24) Python 2.7.9 (default, Mar 1 2015, 12:57:24)
[GCC 4.9.2] on linux2 [GCC 4.9.2] on linux2
Type "help", "copyright", "credits" or "license" for more information. Type "help", "copyright", "credits" or "license" for more information.
15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Welcome to Welcome to
____ __ ____ __
/ __/__ ___ _____/ /__ / __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/ _\ \/ _ \/ _ `/ __/ '_/
/__ / .__/\_,_/_/ /_/\_\ version 1.4.0 /__ / .__/\_,_/_/ /_/\_\ version 1.5.1
/_/ /_/
Using Python version 2.7.9 (default, Mar 1 2015 12:57:24) Using Python version 2.7.9 (default, Mar 1 2015 12:57:24)
SparkContext available as sc, HiveContext available as sqlContext. SparkContext available as sc, HiveContext available as sqlContext.
>>> import socket >>> sc.textFile("gs://dataflow-samples/shakespeare/*").map(lambda s: len(s.split())).sum()
>>> sc.parallelize(range(1000)).map(lambda x:socket.gethostname()).distinct().collect() 939193
['spark-worker-controller-u40r2', 'spark-worker-controller-hifwi', 'spark-worker-controller-vpgyg']
``` ```
Congratulations, you just counted all of the words in all of the plays of
Shakespeare.
## Result ## Result
You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers. You now have services and replication controllers for the Spark master, Spark
You can take this example to the next step and start using the Apache Spark cluster workers and Spark driver. You can take this example to the next step and start
you just created, see [Spark documentation](https://spark.apache.org/documentation.html) using the Apache Spark cluster you just created, see
for more information. [Spark documentation](https://spark.apache.org/documentation.html) for more
information.
## tl;dr ## tl;dr
```kubectl create -f spark-master.json``` ```console
kubectl create -f examples/spark/spark-master-controller.yaml
kubectl create -f examples/spark/spark-master-service.yaml
kubectl create -f examples/spark/spark-webui.yaml
kubectl create -f examples/spark/spark-worker-controller.yaml
kubectl create -f examples/spark/spark-driver-controller.yaml
```
```kubectl create -f spark-master-service.json``` After it's setup:
Make sure the Master Pod is running (use: ```kubectl get pods```).
```kubectl create -f spark-worker-controller.json```
```kubectl create -f spark-driver.json```
```console
kubectl get pods # Make sure everything is running
kubectl get services spark-webui # Get the IP of the Spark WebUI
kubectl get pods -lcomponent=spark-driver # Get the driver pod to interact with.
```
<!-- BEGIN MUNGE: GENERATED_ANALYTICS --> <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]() [![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()

View File

@ -0,0 +1,29 @@
all: push
# To bump the Spark version, bump the version in base/Dockerfile, bump
# this tag and reset to v1. You should also double check the native
# Hadoop libs at that point (we grab the 2.6.1 libs, which are
# appropriate for 1.5.1-with-2.6).
TAG = 1.5.1_v1
containers:
docker build -t gcr.io/google_containers/spark-base base
docker tag gcr.io/google_containers/spark-base gcr.io/google_containers/spark-base:$(TAG)
docker build -t gcr.io/google_containers/spark-worker worker
docker tag gcr.io/google_containers/spark-worker gcr.io/google_containers/spark-worker:$(TAG)
docker build -t gcr.io/google_containers/spark-master master
docker tag gcr.io/google_containers/spark-master gcr.io/google_containers/spark-master:$(TAG)
docker build -t gcr.io/google_containers/spark-driver driver
docker tag gcr.io/google_containers/spark-driver gcr.io/google_containers/spark-driver:$(TAG)
push: containers
gcloud docker push gcr.io/google_containers/spark-base
gcloud docker push gcr.io/google_containers/spark-base:$(TAG)
gcloud docker push gcr.io/google_containers/spark-worker
gcloud docker push gcr.io/google_containers/spark-worker:$(TAG)
gcloud docker push gcr.io/google_containers/spark-master
gcloud docker push gcr.io/google_containers/spark-master:$(TAG)
gcloud docker push gcr.io/google_containers/spark-driver
gcloud docker push gcr.io/google_containers/spark-driver:$(TAG)
clean:

View File

@ -3,15 +3,35 @@ FROM java:latest
RUN apt-get update -y RUN apt-get update -y
RUN apt-get install -y scala RUN apt-get install -y scala
# Get Spark from some apache mirror. ENV hadoop_ver 2.6.1
ENV spark_ver 1.5.1
# Get Hadoop from US Apache mirror and extract just the native
# libs. (Until we care about running HDFS with these containers, this
# is all we need.)
RUN mkdir -p /opt && \ RUN mkdir -p /opt && \
cd /opt && \ cd /opt && \
wget http://apache.mirrors.pair.com/spark/spark-1.4.0/spark-1.4.0-bin-hadoop2.6.tgz && \ wget http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \
tar -zvxf spark-1.4.0-bin-hadoop2.6.tgz && \ tar -zvxf hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \
rm spark-1.4.0-bin-hadoop2.6.tgz && \ rm hadoop-${hadoop_ver}.tar.gz && \
ln -s spark-1.4.0-bin-hadoop2.6 spark && \ ln -s hadoop-${hadoop_ver} hadoop && \
echo Spark installed in /opt echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
# Get Spark from US Apache mirror.
RUN mkdir -p /opt && \
cd /opt && \
wget http://www.us.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop2.6.tgz && \
tar -zvxf spark-${spark_ver}-bin-hadoop2.6.tgz && \
rm spark-${spark_ver}-bin-hadoop2.6.tgz && \
ln -s spark-${spark_ver}-bin-hadoop2.6 spark && \
echo Spark ${spark_ver} installed in /opt
# Add the GCS connector.
RUN wget -O /opt/spark/lib/gcs-connector-latest-hadoop2.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar
ADD log4j.properties /opt/spark/conf/log4j.properties ADD log4j.properties /opt/spark/conf/log4j.properties
ADD setup_client.sh / ADD setup_client.sh /
ADD start-common.sh /
ADD core-site.xml /opt/spark/conf/core-site.xml
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
ENV PATH $PATH:/opt/spark/bin ENV PATH $PATH:/opt/spark/bin

View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
<description>The FileSystem for gs: (GCS) uris.</description>
</property>
<property>
<name>fs.AbstractFileSystem.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
<description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
</property>
<property>
<name>fs.gs.project.id</name>
<value>NOT_RUNNING_INSIDE_GCE</value>
</property>
</configuration>

View File

@ -0,0 +1,5 @@
spark.master spark://spark-master:7077
spark.executor.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
spark.driver.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
spark.driver.extraLibraryPath /opt/hadoop/lib/native
spark.app.id KubernetesSpark

View File

@ -0,0 +1,30 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
if [[ -n "${PROJECT_ID}" ]]; then
sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
fi
# We don't want any of the incoming service variables, we'd rather use
# DNS. But this one interferes directly with Spark.
unset SPARK_MASTER_PORT
# spark.{executor,driver}.extraLibraryPath don't actually seem to
# work, this seems to be the only reliable way to get the native libs
# picked up.
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native

View File

@ -1,3 +1,3 @@
FROM gcr.io/google_containers/spark-base:1.4.0_v1 FROM gcr.io/google_containers/spark-base:latest
ADD start.sh /start.sh ADD start.sh /start.sh
CMD ["/start.sh"] CMD ["/start.sh"]

View File

@ -14,9 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts . /start-common.sh
echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh
echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh
echo "Use kubectl exec spark-driver -it bash to invoke commands" echo "Use kubectl exec spark-driver -it bash to invoke commands"
while true; do while true; do
sleep 100 sleep 100

View File

@ -1,7 +1,7 @@
FROM gcr.io/google_containers/spark-base:1.4.0_v1 FROM gcr.io/google_containers/spark-base:latest
ADD start.sh / ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 7077 EXPOSE 7077 8080
ENTRYPOINT ["/start.sh"] ENTRYPOINT ["/start.sh"]

View File

@ -14,6 +14,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
export SPARK_MASTER_PORT=${SPARK_MASTER_SERVICE_PORT:-7077} . /start-common.sh
echo "$(hostname -i) spark-master" >> /etc/hosts
export SPARK_LOCAL_HOSTNAME=spark-master
export SPARK_MASTER_IP=spark-master
/opt/spark/sbin/start-master.sh /opt/spark/sbin/start-master.sh
tail -F /opt/spark/logs/* tail -F /opt/spark/logs/*

View File

@ -1,7 +1,7 @@
FROM gcr.io/google_containers/spark-base:1.4.0_v1 FROM gcr.io/google_containers/spark-base:latest
ADD start.sh / ADD start.sh /
ADD log4j.properties /opt/spark/conf/log4j.properties ADD log4j.properties /opt/spark/conf/log4j.properties
EXPOSE 8080 EXPOSE 8080
ENTRYPOINT ["/start.sh"] ENTRYPOINT ["/start.sh"]

View File

@ -14,15 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
if [[ ${SPARK_MASTER_SERVICE_HOST} == "" ]]; then . /start-common.sh
echo "Spark Master service must be created before starting any workers"
sleep 30 # To postpone pod restart
exit 1
fi
echo "${SPARK_MASTER_SERVICE_HOST} spark-master" >> /etc/hosts /opt/spark/sbin/start-slave.sh spark://spark-master:7077
export SPARK_LOCAL_HOSTNAME=$(hostname -i)
/opt/spark/sbin/start-slave.sh spark://spark-master:${SPARK_MASTER_SERVICE_PORT}
tail -F /opt/spark/logs/* tail -F /opt/spark/logs/*

View File

@ -0,0 +1,21 @@
kind: ReplicationController
apiVersion: v1
metadata:
name: spark-driver-controller
labels:
component: spark-driver
spec:
replicas: 1
selector:
component: spark-driver
template:
metadata:
labels:
component: spark-driver
spec:
containers:
- name: spark-driver
image: gcr.io/google_containers/spark-driver:1.5.1_v1
resources:
requests:
cpu: 100m

View File

@ -1,23 +0,0 @@
{
"kind": "Pod",
"apiVersion": "v1",
"metadata": {
"name": "spark-driver",
"labels": {
"name": "spark-driver"
}
},
"spec": {
"containers": [
{
"name": "spark-driver",
"image": "gcr.io/google_containers/spark-driver:1.4.0_v1",
"resources": {
"limits": {
"cpu": "100m"
}
}
}
]
}
}

View File

@ -3,7 +3,7 @@ apiVersion: v1
metadata: metadata:
name: spark-master-controller name: spark-master-controller
labels: labels:
component: spark-master-controller component: spark-master
spec: spec:
replicas: 1 replicas: 1
selector: selector:
@ -15,14 +15,14 @@ spec:
spec: spec:
containers: containers:
- name: spark-master - name: spark-master
image: gcr.io/google_containers/spark-master image: gcr.io/google_containers/spark-master:1.5.1_v1
ports: ports:
- containerPort: 7077 - containerPort: 7077
volumeMounts: volumeMounts:
- mountPath: /mnt/glusterfs - mountPath: /mnt/glusterfs
name: glusterfsvol name: glusterfsvol
resources: resources:
limits: requests:
cpu: 100m cpu: 100m
volumes: volumes:
- name: glusterfsvol - name: glusterfsvol

View File

@ -9,4 +9,4 @@ spec:
- port: 7077 - port: 7077
targetPort: 7077 targetPort: 7077
selector: selector:
component: spark-master-controller component: spark-master

View File

@ -12,18 +12,18 @@ spec:
metadata: metadata:
labels: labels:
component: spark-worker component: spark-worker
uses: spark-master-controller uses: spark-master
spec: spec:
containers: containers:
- name: spark-worker - name: spark-worker
image: gcr.io/google_containers/spark-worker image: gcr.io/google_containers/spark-worker:1.5.1_v1
ports: ports:
- containerPort: 8888 - containerPort: 8888
volumeMounts: volumeMounts:
- mountPath: /mnt/glusterfs - mountPath: /mnt/glusterfs
name: glusterfsvol name: glusterfsvol
resources: resources:
limits: requests:
cpu: 100m cpu: 100m
volumes: volumes:
- name: glusterfsvol - name: glusterfsvol

View File

@ -0,0 +1,24 @@
kind: ReplicationController
apiVersion: v1
metadata:
name: spark-master-controller
labels:
component: spark-master
spec:
replicas: 1
selector:
component: spark-master
template:
metadata:
labels:
component: spark-master
spec:
containers:
- name: spark-master
image: gcr.io/google_containers/spark-master:1.5.1_v1
ports:
- containerPort: 7077
- containerPort: 8080
resources:
requests:
cpu: 100m

View File

@ -1,21 +0,0 @@
{
"kind": "Service",
"apiVersion": "v1",
"metadata": {
"name": "spark-master",
"labels": {
"name": "spark-master"
}
},
"spec": {
"ports": [
{
"port": 7077,
"targetPort": 7077
}
],
"selector": {
"name": "spark-master"
}
}
}

View File

@ -0,0 +1,12 @@
kind: Service
apiVersion: v1
metadata:
name: spark-master
labels:
component: spark-master-service
spec:
ports:
- port: 7077
targetPort: 7077
selector:
component: spark-master

View File

@ -1,28 +0,0 @@
{
"kind": "Pod",
"apiVersion": "v1",
"metadata": {
"name": "spark-master",
"labels": {
"name": "spark-master"
}
},
"spec": {
"containers": [
{
"name": "spark-master",
"image": "gcr.io/google_containers/spark-master:1.4.0_v1",
"ports": [
{
"containerPort": 7077
}
],
"resources": {
"limits": {
"cpu": "100m"
}
}
}
]
}
}

View File

@ -0,0 +1,11 @@
kind: Service
apiVersion: v1
metadata:
name: spark-webui
spec:
ports:
- port: 8080
targetPort: 8080
selector:
component: spark-master
type: LoadBalancer

View File

@ -1,43 +0,0 @@
{
"kind": "ReplicationController",
"apiVersion": "v1",
"metadata": {
"name": "spark-worker-controller",
"labels": {
"name": "spark-worker"
}
},
"spec": {
"replicas": 3,
"selector": {
"name": "spark-worker"
},
"template": {
"metadata": {
"labels": {
"name": "spark-worker",
"uses": "spark-master"
}
},
"spec": {
"containers": [
{
"name": "spark-worker",
"image": "gcr.io/google_containers/spark-worker:1.4.0_v1",
"ports": [
{
"hostPort": 8888,
"containerPort": 8888
}
],
"resources": {
"limits": {
"cpu": "100m"
}
}
}
]
}
}
}
}

View File

@ -0,0 +1,24 @@
kind: ReplicationController
apiVersion: v1
metadata:
name: spark-worker-controller
labels:
component: spark-worker
spec:
replicas: 3
selector:
component: spark-worker
template:
metadata:
labels:
component: spark-worker
uses: spark-master
spec:
containers:
- name: spark-worker
image: gcr.io/google_containers/spark-worker:1.5.1_v1
ports:
- containerPort: 8888
resources:
requests:
cpu: 100m