mirror of https://github.com/k3s-io/k3s
commit
63c512cfed
|
@ -356,10 +356,11 @@ func TestExampleObjectSchemas(t *testing.T) {
|
|||
"secret": &api.Secret{},
|
||||
},
|
||||
"../examples/spark": {
|
||||
"spark-driver-controller": &api.ReplicationController{},
|
||||
"spark-master-controller": &api.ReplicationController{},
|
||||
"spark-master-service": &api.Service{},
|
||||
"spark-master": &api.Pod{},
|
||||
"spark-webui": &api.Service{},
|
||||
"spark-worker-controller": &api.ReplicationController{},
|
||||
"spark-driver": &api.Pod{},
|
||||
},
|
||||
"../examples/spark/spark-gluster": {
|
||||
"spark-master-service": &api.Service{},
|
||||
|
|
|
@ -57,60 +57,98 @@ instructions for your platform.
|
|||
|
||||
## Step One: Start your Master service
|
||||
|
||||
The Master [service](../../docs/user-guide/services.md) is the master (or head) service for a Spark
|
||||
cluster.
|
||||
The Master [service](../../docs/user-guide/services.md) is the master service
|
||||
for a Spark cluster.
|
||||
|
||||
Use the [`examples/spark/spark-master.json`](spark-master.json) file to create a [pod](../../docs/user-guide/pods.md) running
|
||||
the Master service.
|
||||
Use the
|
||||
[`examples/spark/spark-master-controller.yaml`](spark-master-controller.yaml)
|
||||
file to create a
|
||||
[replication controller](../../docs/user-guide/replication-controller.md)
|
||||
running the Spark Master service.
|
||||
|
||||
```sh
|
||||
$ kubectl create -f examples/spark/spark-master.json
|
||||
```console
|
||||
$ kubectl create -f examples/spark/spark-master-controller.yaml
|
||||
replicationcontrollers/spark-master-controller
|
||||
```
|
||||
|
||||
Then, use the [`examples/spark/spark-master-service.json`](spark-master-service.json) file to
|
||||
create a logical service endpoint that Spark workers can use to access
|
||||
the Master pod.
|
||||
Then, use the
|
||||
[`examples/spark/spark-master-service.yaml`](spark-master-service.yaml) file to
|
||||
create a logical service endpoint that Spark workers can use to access the
|
||||
Master pod.
|
||||
|
||||
```sh
|
||||
$ kubectl create -f examples/spark/spark-master-service.json
|
||||
```console
|
||||
$ kubectl create -f examples/spark/spark-master-service.yaml
|
||||
services/spark-master
|
||||
```
|
||||
|
||||
Optionally, you can create a service for the Spark Master WebUI at this point as
|
||||
well. If you are running on a cloud provider that supports it, this will create
|
||||
an external load balancer and open a firewall to the Spark Master WebUI on the
|
||||
cluster. **Note:** With the existing configuration, there is **ABSOLUTELY NO**
|
||||
authentication on this WebUI. With slightly more work, it would be
|
||||
straightforward to put an `nginx` proxy in front to password protect it.
|
||||
|
||||
```console
|
||||
$ kubectl create -f examples/spark/spark-webui.yaml
|
||||
services/spark-webui
|
||||
```
|
||||
|
||||
### Check to see if Master is running and accessible
|
||||
|
||||
```sh
|
||||
```console
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
[...]
|
||||
spark-master 1/1 Running 0 25s
|
||||
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
spark-master-controller-5u0q5 1/1 Running 0 8m
|
||||
```
|
||||
|
||||
Check logs to see the status of the master.
|
||||
Check logs to see the status of the master. (Use the pod retrieved from the previous output.)
|
||||
|
||||
```sh
|
||||
$ kubectl logs spark-master
|
||||
|
||||
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.4.0-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master.out
|
||||
Spark Command: /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java -cp /opt/spark-1.4.0-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar -Xms512m -Xmx512m -XX:MaxPermSize=128m org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
|
||||
$ kubectl logs spark-master-controller-5u0q5
|
||||
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.5.1-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master-controller-g0oao.out
|
||||
Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /opt/spark-1.5.1-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.5.1-bin-hadoop2.6/lib/spark-assembly-1.5.1-hadoop2.6.0.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar -Xms1g -Xmx1g org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
|
||||
========================================
|
||||
15/06/26 14:01:49 INFO Master: Registered signal handlers for [TERM, HUP, INT]
|
||||
15/06/26 14:01:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
||||
15/06/26 14:01:51 INFO SecurityManager: Changing view acls to: root
|
||||
15/06/26 14:01:51 INFO SecurityManager: Changing modify acls to: root
|
||||
15/06/26 14:01:51 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
|
||||
15/06/26 14:01:51 INFO Slf4jLogger: Slf4jLogger started
|
||||
15/06/26 14:01:51 INFO Remoting: Starting remoting
|
||||
15/06/26 14:01:52 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
|
||||
15/06/26 14:01:52 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
|
||||
15/06/26 14:01:52 INFO Utils: Successfully started service on port 6066.
|
||||
15/06/26 14:01:52 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066
|
||||
15/06/26 14:01:52 INFO Master: Starting Spark master at spark://spark-master:7077
|
||||
15/06/26 14:01:52 INFO Master: Running Spark version 1.4.0
|
||||
15/06/26 14:01:52 INFO Utils: Successfully started service 'MasterUI' on port 8080.
|
||||
15/06/26 14:01:52 INFO MasterWebUI: Started MasterWebUI at http://10.244.2.34:8080
|
||||
15/06/26 14:01:53 INFO Master: I have been elected leader! New state: ALIVE
|
||||
15/10/27 21:25:05 INFO Master: Registered signal handlers for [TERM, HUP, INT]
|
||||
15/10/27 21:25:05 INFO SecurityManager: Changing view acls to: root
|
||||
15/10/27 21:25:05 INFO SecurityManager: Changing modify acls to: root
|
||||
15/10/27 21:25:05 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
|
||||
15/10/27 21:25:06 INFO Slf4jLogger: Slf4jLogger started
|
||||
15/10/27 21:25:06 INFO Remoting: Starting remoting
|
||||
15/10/27 21:25:06 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
|
||||
15/10/27 21:25:06 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
|
||||
15/10/27 21:25:07 INFO Master: Starting Spark master at spark://spark-master:7077
|
||||
15/10/27 21:25:07 INFO Master: Running Spark version 1.5.1
|
||||
15/10/27 21:25:07 INFO Utils: Successfully started service 'MasterUI' on port 8080.
|
||||
15/10/27 21:25:07 INFO MasterWebUI: Started MasterWebUI at http://spark-master:8080
|
||||
15/10/27 21:25:07 INFO Utils: Successfully started service on port 6066.
|
||||
15/10/27 21:25:07 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066
|
||||
15/10/27 21:25:07 INFO Master: I have been elected leader! New state: ALIVE
|
||||
```
|
||||
|
||||
If you created the Spark WebUI and waited sufficient time for the load balancer
|
||||
to be create, the `spark-webui` service should look something like this:
|
||||
|
||||
```console
|
||||
$ kubectl describe services/spark-webui
|
||||
Name: spark-webui
|
||||
Namespace: default
|
||||
Labels: <none>
|
||||
Selector: component=spark-master
|
||||
Type: LoadBalancer
|
||||
IP: 10.0.152.249
|
||||
LoadBalancer Ingress: 104.197.147.190
|
||||
Port: <unnamed> 8080/TCP
|
||||
NodePort: <unnamed> 31141/TCP
|
||||
Endpoints: 10.244.1.12:8080
|
||||
Session Affinity: None
|
||||
Events: [...]
|
||||
```
|
||||
|
||||
You should now be able to visit `http://104.197.147.190:8080` and see the Spark
|
||||
Master UI. *Note:* After workers connect, this UI has links to worker Web
|
||||
UIs. The worker UI links do not work (the links attempt to connect to cluster
|
||||
IPs).
|
||||
|
||||
## Step Two: Start your Spark workers
|
||||
|
||||
The Spark workers do the heavy lifting in a Spark cluster. They
|
||||
|
@ -119,96 +157,104 @@ program.
|
|||
|
||||
The Spark workers need the Master service to be running.
|
||||
|
||||
Use the [`examples/spark/spark-worker-controller.json`](spark-worker-controller.json) file to create a
|
||||
Use the [`examples/spark/spark-worker-controller.yaml`](spark-worker-controller.yaml) file to create a
|
||||
[replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods.
|
||||
|
||||
```sh
|
||||
$ kubectl create -f examples/spark/spark-worker-controller.json
|
||||
```console
|
||||
$ kubectl create -f examples/spark/spark-worker-controller.yaml
|
||||
```
|
||||
|
||||
### Check to see if the workers are running
|
||||
|
||||
```sh
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
[...]
|
||||
spark-master 1/1 Running 0 14m
|
||||
spark-worker-controller-hifwi 1/1 Running 0 33s
|
||||
spark-worker-controller-u40r2 1/1 Running 0 33s
|
||||
spark-worker-controller-vpgyg 1/1 Running 0 33s
|
||||
If you launched the Spark WebUI, your workers should just appear in the UI when
|
||||
they're ready. (It may take a little bit to pull the images and launch the
|
||||
pods.) You can also interrogate the status in the following way:
|
||||
|
||||
$ kubectl logs spark-master
|
||||
```console
|
||||
$ kubectl get pods
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
spark-master-controller-5u0q5 1/1 Running 0 25m
|
||||
spark-worker-controller-e8otp 1/1 Running 0 6m
|
||||
spark-worker-controller-fiivl 1/1 Running 0 6m
|
||||
spark-worker-controller-ytc7o 1/1 Running 0 6m
|
||||
|
||||
$ kubectl logs spark-master-controller-5u0q5
|
||||
[...]
|
||||
15/06/26 14:15:43 INFO Master: Registering worker 10.244.2.35:46199 with 1 cores, 2.6 GB RAM
|
||||
15/06/26 14:15:55 INFO Master: Registering worker 10.244.1.15:44839 with 1 cores, 2.6 GB RAM
|
||||
15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM
|
||||
15/10/26 18:20:14 INFO Master: Registering worker 10.244.1.13:53567 with 2 cores, 6.3 GB RAM
|
||||
15/10/26 18:20:14 INFO Master: Registering worker 10.244.2.7:46195 with 2 cores, 6.3 GB RAM
|
||||
15/10/26 18:20:14 INFO Master: Registering worker 10.244.3.8:39926 with 2 cores, 6.3 GB RAM
|
||||
```
|
||||
|
||||
## Step Three: Start your Spark driver to launch jobs on your Spark cluster
|
||||
|
||||
The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
|
||||
[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html).
|
||||
[Spark architecture](https://spark.apache.org/docs/latest/cluster-overview.html).
|
||||
|
||||
```shell
|
||||
$ kubectl create -f examples/spark/spark-driver.json
|
||||
```console
|
||||
$ kubectl create -f examples/spark/spark-driver-controller.yaml
|
||||
replicationcontrollers/spark-driver-controller
|
||||
```
|
||||
|
||||
The Spark driver needs the Master service to be running.
|
||||
|
||||
### Check to see if the driver is running
|
||||
|
||||
```shell
|
||||
$ kubectl get pods
|
||||
NAME READY REASON RESTARTS AGE
|
||||
[...]
|
||||
spark-master 1/1 Running 0 14m
|
||||
spark-driver 1/1 Running 0 10m
|
||||
```console
|
||||
$ kubectl get pods -lcomponent=spark-driver
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
spark-driver-controller-vwb9c 1/1 Running 0 1m
|
||||
```
|
||||
|
||||
## Step Four: Do something with the cluster
|
||||
|
||||
Use the kubectl exec to connect to Spark driver
|
||||
Use the kubectl exec to connect to Spark driver and run a pipeline.
|
||||
|
||||
```
|
||||
$ kubectl exec spark-driver -it bash
|
||||
root@spark-driver:/#
|
||||
root@spark-driver:/# pyspark
|
||||
```console
|
||||
$ kubectl exec spark-driver-controller-vwb9c -it pyspark
|
||||
Python 2.7.9 (default, Mar 1 2015, 12:57:24)
|
||||
[GCC 4.9.2] on linux2
|
||||
Type "help", "copyright", "credits" or "license" for more information.
|
||||
15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
||||
Welcome to
|
||||
____ __
|
||||
/ __/__ ___ _____/ /__
|
||||
_\ \/ _ \/ _ `/ __/ '_/
|
||||
/__ / .__/\_,_/_/ /_/\_\ version 1.4.0
|
||||
/__ / .__/\_,_/_/ /_/\_\ version 1.5.1
|
||||
/_/
|
||||
|
||||
Using Python version 2.7.9 (default, Mar 1 2015 12:57:24)
|
||||
SparkContext available as sc, HiveContext available as sqlContext.
|
||||
>>> import socket
|
||||
>>> sc.parallelize(range(1000)).map(lambda x:socket.gethostname()).distinct().collect()
|
||||
['spark-worker-controller-u40r2', 'spark-worker-controller-hifwi', 'spark-worker-controller-vpgyg']
|
||||
>>> sc.textFile("gs://dataflow-samples/shakespeare/*").map(lambda s: len(s.split())).sum()
|
||||
939193
|
||||
```
|
||||
|
||||
Congratulations, you just counted all of the words in all of the plays of
|
||||
Shakespeare.
|
||||
|
||||
## Result
|
||||
|
||||
You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers.
|
||||
You can take this example to the next step and start using the Apache Spark cluster
|
||||
you just created, see [Spark documentation](https://spark.apache.org/documentation.html)
|
||||
for more information.
|
||||
You now have services and replication controllers for the Spark master, Spark
|
||||
workers and Spark driver. You can take this example to the next step and start
|
||||
using the Apache Spark cluster you just created, see
|
||||
[Spark documentation](https://spark.apache.org/documentation.html) for more
|
||||
information.
|
||||
|
||||
## tl;dr
|
||||
|
||||
```kubectl create -f spark-master.json```
|
||||
```console
|
||||
kubectl create -f examples/spark/spark-master-controller.yaml
|
||||
kubectl create -f examples/spark/spark-master-service.yaml
|
||||
kubectl create -f examples/spark/spark-webui.yaml
|
||||
kubectl create -f examples/spark/spark-worker-controller.yaml
|
||||
kubectl create -f examples/spark/spark-driver-controller.yaml
|
||||
```
|
||||
|
||||
```kubectl create -f spark-master-service.json```
|
||||
|
||||
Make sure the Master Pod is running (use: ```kubectl get pods```).
|
||||
|
||||
```kubectl create -f spark-worker-controller.json```
|
||||
|
||||
```kubectl create -f spark-driver.json```
|
||||
After it's setup:
|
||||
|
||||
```console
|
||||
kubectl get pods # Make sure everything is running
|
||||
kubectl get services spark-webui # Get the IP of the Spark WebUI
|
||||
kubectl get pods -lcomponent=spark-driver # Get the driver pod to interact with.
|
||||
```
|
||||
|
||||
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
|
||||
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
all: push
|
||||
|
||||
# To bump the Spark version, bump the version in base/Dockerfile, bump
|
||||
# this tag and reset to v1. You should also double check the native
|
||||
# Hadoop libs at that point (we grab the 2.6.1 libs, which are
|
||||
# appropriate for 1.5.1-with-2.6).
|
||||
TAG = 1.5.1_v1
|
||||
|
||||
containers:
|
||||
docker build -t gcr.io/google_containers/spark-base base
|
||||
docker tag gcr.io/google_containers/spark-base gcr.io/google_containers/spark-base:$(TAG)
|
||||
docker build -t gcr.io/google_containers/spark-worker worker
|
||||
docker tag gcr.io/google_containers/spark-worker gcr.io/google_containers/spark-worker:$(TAG)
|
||||
docker build -t gcr.io/google_containers/spark-master master
|
||||
docker tag gcr.io/google_containers/spark-master gcr.io/google_containers/spark-master:$(TAG)
|
||||
docker build -t gcr.io/google_containers/spark-driver driver
|
||||
docker tag gcr.io/google_containers/spark-driver gcr.io/google_containers/spark-driver:$(TAG)
|
||||
|
||||
push: containers
|
||||
gcloud docker push gcr.io/google_containers/spark-base
|
||||
gcloud docker push gcr.io/google_containers/spark-base:$(TAG)
|
||||
gcloud docker push gcr.io/google_containers/spark-worker
|
||||
gcloud docker push gcr.io/google_containers/spark-worker:$(TAG)
|
||||
gcloud docker push gcr.io/google_containers/spark-master
|
||||
gcloud docker push gcr.io/google_containers/spark-master:$(TAG)
|
||||
gcloud docker push gcr.io/google_containers/spark-driver
|
||||
gcloud docker push gcr.io/google_containers/spark-driver:$(TAG)
|
||||
|
||||
clean:
|
|
@ -3,15 +3,35 @@ FROM java:latest
|
|||
RUN apt-get update -y
|
||||
RUN apt-get install -y scala
|
||||
|
||||
# Get Spark from some apache mirror.
|
||||
ENV hadoop_ver 2.6.1
|
||||
ENV spark_ver 1.5.1
|
||||
|
||||
# Get Hadoop from US Apache mirror and extract just the native
|
||||
# libs. (Until we care about running HDFS with these containers, this
|
||||
# is all we need.)
|
||||
RUN mkdir -p /opt && \
|
||||
cd /opt && \
|
||||
wget http://apache.mirrors.pair.com/spark/spark-1.4.0/spark-1.4.0-bin-hadoop2.6.tgz && \
|
||||
tar -zvxf spark-1.4.0-bin-hadoop2.6.tgz && \
|
||||
rm spark-1.4.0-bin-hadoop2.6.tgz && \
|
||||
ln -s spark-1.4.0-bin-hadoop2.6 spark && \
|
||||
echo Spark installed in /opt
|
||||
wget http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \
|
||||
tar -zvxf hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \
|
||||
rm hadoop-${hadoop_ver}.tar.gz && \
|
||||
ln -s hadoop-${hadoop_ver} hadoop && \
|
||||
echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
|
||||
|
||||
# Get Spark from US Apache mirror.
|
||||
RUN mkdir -p /opt && \
|
||||
cd /opt && \
|
||||
wget http://www.us.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||
tar -zvxf spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||
rm spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||
ln -s spark-${spark_ver}-bin-hadoop2.6 spark && \
|
||||
echo Spark ${spark_ver} installed in /opt
|
||||
|
||||
# Add the GCS connector.
|
||||
RUN wget -O /opt/spark/lib/gcs-connector-latest-hadoop2.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar
|
||||
|
||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||
ADD setup_client.sh /
|
||||
ADD start-common.sh /
|
||||
ADD core-site.xml /opt/spark/conf/core-site.xml
|
||||
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
|
||||
ENV PATH $PATH:/opt/spark/bin
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.gs.impl</name>
|
||||
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
|
||||
<description>The FileSystem for gs: (GCS) uris.</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.AbstractFileSystem.gs.impl</name>
|
||||
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
|
||||
<description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.gs.project.id</name>
|
||||
<value>NOT_RUNNING_INSIDE_GCE</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,5 @@
|
|||
spark.master spark://spark-master:7077
|
||||
spark.executor.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
|
||||
spark.driver.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
|
||||
spark.driver.extraLibraryPath /opt/hadoop/lib/native
|
||||
spark.app.id KubernetesSpark
|
|
@ -0,0 +1,30 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
|
||||
|
||||
if [[ -n "${PROJECT_ID}" ]]; then
|
||||
sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
|
||||
fi
|
||||
|
||||
# We don't want any of the incoming service variables, we'd rather use
|
||||
# DNS. But this one interferes directly with Spark.
|
||||
unset SPARK_MASTER_PORT
|
||||
|
||||
# spark.{executor,driver}.extraLibraryPath don't actually seem to
|
||||
# work, this seems to be the only reliable way to get the native libs
|
||||
# picked up.
|
||||
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native
|
|
@ -1,3 +1,3 @@
|
|||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
||||
FROM gcr.io/google_containers/spark-base:latest
|
||||
ADD start.sh /start.sh
|
||||
CMD ["/start.sh"]
|
||||
|
|
|
@ -14,9 +14,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts
|
||||
echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh
|
||||
echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh
|
||||
. /start-common.sh
|
||||
|
||||
echo "Use kubectl exec spark-driver -it bash to invoke commands"
|
||||
while true; do
|
||||
sleep 100
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
||||
FROM gcr.io/google_containers/spark-base:latest
|
||||
|
||||
ADD start.sh /
|
||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||
EXPOSE 7077
|
||||
EXPOSE 7077 8080
|
||||
|
||||
ENTRYPOINT ["/start.sh"]
|
||||
|
|
|
@ -14,6 +14,11 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
export SPARK_MASTER_PORT=${SPARK_MASTER_SERVICE_PORT:-7077}
|
||||
. /start-common.sh
|
||||
|
||||
echo "$(hostname -i) spark-master" >> /etc/hosts
|
||||
export SPARK_LOCAL_HOSTNAME=spark-master
|
||||
export SPARK_MASTER_IP=spark-master
|
||||
|
||||
/opt/spark/sbin/start-master.sh
|
||||
tail -F /opt/spark/logs/*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
||||
FROM gcr.io/google_containers/spark-base:latest
|
||||
|
||||
ADD start.sh /
|
||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["/start.sh"]
|
||||
|
|
|
@ -14,15 +14,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
if [[ ${SPARK_MASTER_SERVICE_HOST} == "" ]]; then
|
||||
echo "Spark Master service must be created before starting any workers"
|
||||
sleep 30 # To postpone pod restart
|
||||
exit 1
|
||||
fi
|
||||
. /start-common.sh
|
||||
|
||||
echo "${SPARK_MASTER_SERVICE_HOST} spark-master" >> /etc/hosts
|
||||
export SPARK_LOCAL_HOSTNAME=$(hostname -i)
|
||||
|
||||
/opt/spark/sbin/start-slave.sh spark://spark-master:${SPARK_MASTER_SERVICE_PORT}
|
||||
/opt/spark/sbin/start-slave.sh spark://spark-master:7077
|
||||
|
||||
tail -F /opt/spark/logs/*
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
kind: ReplicationController
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: spark-driver-controller
|
||||
labels:
|
||||
component: spark-driver
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
component: spark-driver
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
component: spark-driver
|
||||
spec:
|
||||
containers:
|
||||
- name: spark-driver
|
||||
image: gcr.io/google_containers/spark-driver:1.5.1_v1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
|
@ -1,23 +0,0 @@
|
|||
{
|
||||
"kind": "Pod",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "spark-driver",
|
||||
"labels": {
|
||||
"name": "spark-driver"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "spark-driver",
|
||||
"image": "gcr.io/google_containers/spark-driver:1.4.0_v1",
|
||||
"resources": {
|
||||
"limits": {
|
||||
"cpu": "100m"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -3,7 +3,7 @@ apiVersion: v1
|
|||
metadata:
|
||||
name: spark-master-controller
|
||||
labels:
|
||||
component: spark-master-controller
|
||||
component: spark-master
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
|
@ -15,14 +15,14 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: spark-master
|
||||
image: gcr.io/google_containers/spark-master
|
||||
image: gcr.io/google_containers/spark-master:1.5.1_v1
|
||||
ports:
|
||||
- containerPort: 7077
|
||||
volumeMounts:
|
||||
- mountPath: /mnt/glusterfs
|
||||
name: glusterfsvol
|
||||
resources:
|
||||
limits:
|
||||
requests:
|
||||
cpu: 100m
|
||||
volumes:
|
||||
- name: glusterfsvol
|
||||
|
|
|
@ -9,4 +9,4 @@ spec:
|
|||
- port: 7077
|
||||
targetPort: 7077
|
||||
selector:
|
||||
component: spark-master-controller
|
||||
component: spark-master
|
||||
|
|
|
@ -12,18 +12,18 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
component: spark-worker
|
||||
uses: spark-master-controller
|
||||
uses: spark-master
|
||||
spec:
|
||||
containers:
|
||||
- name: spark-worker
|
||||
image: gcr.io/google_containers/spark-worker
|
||||
image: gcr.io/google_containers/spark-worker:1.5.1_v1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
volumeMounts:
|
||||
- mountPath: /mnt/glusterfs
|
||||
name: glusterfsvol
|
||||
resources:
|
||||
limits:
|
||||
requests:
|
||||
cpu: 100m
|
||||
volumes:
|
||||
- name: glusterfsvol
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
kind: ReplicationController
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: spark-master-controller
|
||||
labels:
|
||||
component: spark-master
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
component: spark-master
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
component: spark-master
|
||||
spec:
|
||||
containers:
|
||||
- name: spark-master
|
||||
image: gcr.io/google_containers/spark-master:1.5.1_v1
|
||||
ports:
|
||||
- containerPort: 7077
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"kind": "Service",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "spark-master",
|
||||
"labels": {
|
||||
"name": "spark-master"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"ports": [
|
||||
{
|
||||
"port": 7077,
|
||||
"targetPort": 7077
|
||||
}
|
||||
],
|
||||
"selector": {
|
||||
"name": "spark-master"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: spark-master
|
||||
labels:
|
||||
component: spark-master-service
|
||||
spec:
|
||||
ports:
|
||||
- port: 7077
|
||||
targetPort: 7077
|
||||
selector:
|
||||
component: spark-master
|
|
@ -1,28 +0,0 @@
|
|||
{
|
||||
"kind": "Pod",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "spark-master",
|
||||
"labels": {
|
||||
"name": "spark-master"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "spark-master",
|
||||
"image": "gcr.io/google_containers/spark-master:1.4.0_v1",
|
||||
"ports": [
|
||||
{
|
||||
"containerPort": 7077
|
||||
}
|
||||
],
|
||||
"resources": {
|
||||
"limits": {
|
||||
"cpu": "100m"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: spark-webui
|
||||
spec:
|
||||
ports:
|
||||
- port: 8080
|
||||
targetPort: 8080
|
||||
selector:
|
||||
component: spark-master
|
||||
type: LoadBalancer
|
|
@ -1,43 +0,0 @@
|
|||
{
|
||||
"kind": "ReplicationController",
|
||||
"apiVersion": "v1",
|
||||
"metadata": {
|
||||
"name": "spark-worker-controller",
|
||||
"labels": {
|
||||
"name": "spark-worker"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"replicas": 3,
|
||||
"selector": {
|
||||
"name": "spark-worker"
|
||||
},
|
||||
"template": {
|
||||
"metadata": {
|
||||
"labels": {
|
||||
"name": "spark-worker",
|
||||
"uses": "spark-master"
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "spark-worker",
|
||||
"image": "gcr.io/google_containers/spark-worker:1.4.0_v1",
|
||||
"ports": [
|
||||
{
|
||||
"hostPort": 8888,
|
||||
"containerPort": 8888
|
||||
}
|
||||
],
|
||||
"resources": {
|
||||
"limits": {
|
||||
"cpu": "100m"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
kind: ReplicationController
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: spark-worker-controller
|
||||
labels:
|
||||
component: spark-worker
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
component: spark-worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
component: spark-worker
|
||||
uses: spark-master
|
||||
spec:
|
||||
containers:
|
||||
- name: spark-worker
|
||||
image: gcr.io/google_containers/spark-worker:1.5.1_v1
|
||||
ports:
|
||||
- containerPort: 8888
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
Loading…
Reference in New Issue