mirror of https://github.com/k3s-io/k3s
commit
63c512cfed
|
@ -356,10 +356,11 @@ func TestExampleObjectSchemas(t *testing.T) {
|
||||||
"secret": &api.Secret{},
|
"secret": &api.Secret{},
|
||||||
},
|
},
|
||||||
"../examples/spark": {
|
"../examples/spark": {
|
||||||
|
"spark-driver-controller": &api.ReplicationController{},
|
||||||
|
"spark-master-controller": &api.ReplicationController{},
|
||||||
"spark-master-service": &api.Service{},
|
"spark-master-service": &api.Service{},
|
||||||
"spark-master": &api.Pod{},
|
"spark-webui": &api.Service{},
|
||||||
"spark-worker-controller": &api.ReplicationController{},
|
"spark-worker-controller": &api.ReplicationController{},
|
||||||
"spark-driver": &api.Pod{},
|
|
||||||
},
|
},
|
||||||
"../examples/spark/spark-gluster": {
|
"../examples/spark/spark-gluster": {
|
||||||
"spark-master-service": &api.Service{},
|
"spark-master-service": &api.Service{},
|
||||||
|
|
|
@ -57,60 +57,98 @@ instructions for your platform.
|
||||||
|
|
||||||
## Step One: Start your Master service
|
## Step One: Start your Master service
|
||||||
|
|
||||||
The Master [service](../../docs/user-guide/services.md) is the master (or head) service for a Spark
|
The Master [service](../../docs/user-guide/services.md) is the master service
|
||||||
cluster.
|
for a Spark cluster.
|
||||||
|
|
||||||
Use the [`examples/spark/spark-master.json`](spark-master.json) file to create a [pod](../../docs/user-guide/pods.md) running
|
Use the
|
||||||
the Master service.
|
[`examples/spark/spark-master-controller.yaml`](spark-master-controller.yaml)
|
||||||
|
file to create a
|
||||||
|
[replication controller](../../docs/user-guide/replication-controller.md)
|
||||||
|
running the Spark Master service.
|
||||||
|
|
||||||
```sh
|
```console
|
||||||
$ kubectl create -f examples/spark/spark-master.json
|
$ kubectl create -f examples/spark/spark-master-controller.yaml
|
||||||
|
replicationcontrollers/spark-master-controller
|
||||||
```
|
```
|
||||||
|
|
||||||
Then, use the [`examples/spark/spark-master-service.json`](spark-master-service.json) file to
|
Then, use the
|
||||||
create a logical service endpoint that Spark workers can use to access
|
[`examples/spark/spark-master-service.yaml`](spark-master-service.yaml) file to
|
||||||
the Master pod.
|
create a logical service endpoint that Spark workers can use to access the
|
||||||
|
Master pod.
|
||||||
|
|
||||||
```sh
|
```console
|
||||||
$ kubectl create -f examples/spark/spark-master-service.json
|
$ kubectl create -f examples/spark/spark-master-service.yaml
|
||||||
|
services/spark-master
|
||||||
|
```
|
||||||
|
|
||||||
|
Optionally, you can create a service for the Spark Master WebUI at this point as
|
||||||
|
well. If you are running on a cloud provider that supports it, this will create
|
||||||
|
an external load balancer and open a firewall to the Spark Master WebUI on the
|
||||||
|
cluster. **Note:** With the existing configuration, there is **ABSOLUTELY NO**
|
||||||
|
authentication on this WebUI. With slightly more work, it would be
|
||||||
|
straightforward to put an `nginx` proxy in front to password protect it.
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ kubectl create -f examples/spark/spark-webui.yaml
|
||||||
|
services/spark-webui
|
||||||
```
|
```
|
||||||
|
|
||||||
### Check to see if Master is running and accessible
|
### Check to see if Master is running and accessible
|
||||||
|
|
||||||
```sh
|
```console
|
||||||
$ kubectl get pods
|
$ kubectl get pods
|
||||||
NAME READY STATUS RESTARTS AGE
|
NAME READY STATUS RESTARTS AGE
|
||||||
[...]
|
spark-master-controller-5u0q5 1/1 Running 0 8m
|
||||||
spark-master 1/1 Running 0 25s
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Check logs to see the status of the master.
|
Check logs to see the status of the master. (Use the pod retrieved from the previous output.)
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ kubectl logs spark-master
|
$ kubectl logs spark-master-controller-5u0q5
|
||||||
|
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.5.1-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master-controller-g0oao.out
|
||||||
starting org.apache.spark.deploy.master.Master, logging to /opt/spark-1.4.0-bin-hadoop2.6/sbin/../logs/spark--org.apache.spark.deploy.master.Master-1-spark-master.out
|
Spark Command: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -cp /opt/spark-1.5.1-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.5.1-bin-hadoop2.6/lib/spark-assembly-1.5.1-hadoop2.6.0.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar:/opt/spark-1.5.1-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar -Xms1g -Xmx1g org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
|
||||||
Spark Command: /usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java -cp /opt/spark-1.4.0-bin-hadoop2.6/sbin/../conf/:/opt/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-api-jdo-3.2.6.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-rdbms-3.2.9.jar:/opt/spark-1.4.0-bin-hadoop2.6/lib/datanucleus-core-3.2.10.jar -Xms512m -Xmx512m -XX:MaxPermSize=128m org.apache.spark.deploy.master.Master --ip spark-master --port 7077 --webui-port 8080
|
|
||||||
========================================
|
========================================
|
||||||
15/06/26 14:01:49 INFO Master: Registered signal handlers for [TERM, HUP, INT]
|
15/10/27 21:25:05 INFO Master: Registered signal handlers for [TERM, HUP, INT]
|
||||||
15/06/26 14:01:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
15/10/27 21:25:05 INFO SecurityManager: Changing view acls to: root
|
||||||
15/06/26 14:01:51 INFO SecurityManager: Changing view acls to: root
|
15/10/27 21:25:05 INFO SecurityManager: Changing modify acls to: root
|
||||||
15/06/26 14:01:51 INFO SecurityManager: Changing modify acls to: root
|
15/10/27 21:25:05 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
|
||||||
15/06/26 14:01:51 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root)
|
15/10/27 21:25:06 INFO Slf4jLogger: Slf4jLogger started
|
||||||
15/06/26 14:01:51 INFO Slf4jLogger: Slf4jLogger started
|
15/10/27 21:25:06 INFO Remoting: Starting remoting
|
||||||
15/06/26 14:01:51 INFO Remoting: Starting remoting
|
15/10/27 21:25:06 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
|
||||||
15/06/26 14:01:52 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkMaster@spark-master:7077]
|
15/10/27 21:25:06 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
|
||||||
15/06/26 14:01:52 INFO Utils: Successfully started service 'sparkMaster' on port 7077.
|
15/10/27 21:25:07 INFO Master: Starting Spark master at spark://spark-master:7077
|
||||||
15/06/26 14:01:52 INFO Utils: Successfully started service on port 6066.
|
15/10/27 21:25:07 INFO Master: Running Spark version 1.5.1
|
||||||
15/06/26 14:01:52 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066
|
15/10/27 21:25:07 INFO Utils: Successfully started service 'MasterUI' on port 8080.
|
||||||
15/06/26 14:01:52 INFO Master: Starting Spark master at spark://spark-master:7077
|
15/10/27 21:25:07 INFO MasterWebUI: Started MasterWebUI at http://spark-master:8080
|
||||||
15/06/26 14:01:52 INFO Master: Running Spark version 1.4.0
|
15/10/27 21:25:07 INFO Utils: Successfully started service on port 6066.
|
||||||
15/06/26 14:01:52 INFO Utils: Successfully started service 'MasterUI' on port 8080.
|
15/10/27 21:25:07 INFO StandaloneRestServer: Started REST server for submitting applications on port 6066
|
||||||
15/06/26 14:01:52 INFO MasterWebUI: Started MasterWebUI at http://10.244.2.34:8080
|
15/10/27 21:25:07 INFO Master: I have been elected leader! New state: ALIVE
|
||||||
15/06/26 14:01:53 INFO Master: I have been elected leader! New state: ALIVE
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you created the Spark WebUI and waited sufficient time for the load balancer
|
||||||
|
to be create, the `spark-webui` service should look something like this:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ kubectl describe services/spark-webui
|
||||||
|
Name: spark-webui
|
||||||
|
Namespace: default
|
||||||
|
Labels: <none>
|
||||||
|
Selector: component=spark-master
|
||||||
|
Type: LoadBalancer
|
||||||
|
IP: 10.0.152.249
|
||||||
|
LoadBalancer Ingress: 104.197.147.190
|
||||||
|
Port: <unnamed> 8080/TCP
|
||||||
|
NodePort: <unnamed> 31141/TCP
|
||||||
|
Endpoints: 10.244.1.12:8080
|
||||||
|
Session Affinity: None
|
||||||
|
Events: [...]
|
||||||
|
```
|
||||||
|
|
||||||
|
You should now be able to visit `http://104.197.147.190:8080` and see the Spark
|
||||||
|
Master UI. *Note:* After workers connect, this UI has links to worker Web
|
||||||
|
UIs. The worker UI links do not work (the links attempt to connect to cluster
|
||||||
|
IPs).
|
||||||
|
|
||||||
## Step Two: Start your Spark workers
|
## Step Two: Start your Spark workers
|
||||||
|
|
||||||
The Spark workers do the heavy lifting in a Spark cluster. They
|
The Spark workers do the heavy lifting in a Spark cluster. They
|
||||||
|
@ -119,96 +157,104 @@ program.
|
||||||
|
|
||||||
The Spark workers need the Master service to be running.
|
The Spark workers need the Master service to be running.
|
||||||
|
|
||||||
Use the [`examples/spark/spark-worker-controller.json`](spark-worker-controller.json) file to create a
|
Use the [`examples/spark/spark-worker-controller.yaml`](spark-worker-controller.yaml) file to create a
|
||||||
[replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods.
|
[replication controller](../../docs/user-guide/replication-controller.md) that manages the worker pods.
|
||||||
|
|
||||||
```sh
|
```console
|
||||||
$ kubectl create -f examples/spark/spark-worker-controller.json
|
$ kubectl create -f examples/spark/spark-worker-controller.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Check to see if the workers are running
|
### Check to see if the workers are running
|
||||||
|
|
||||||
```sh
|
If you launched the Spark WebUI, your workers should just appear in the UI when
|
||||||
$ kubectl get pods
|
they're ready. (It may take a little bit to pull the images and launch the
|
||||||
NAME READY STATUS RESTARTS AGE
|
pods.) You can also interrogate the status in the following way:
|
||||||
[...]
|
|
||||||
spark-master 1/1 Running 0 14m
|
|
||||||
spark-worker-controller-hifwi 1/1 Running 0 33s
|
|
||||||
spark-worker-controller-u40r2 1/1 Running 0 33s
|
|
||||||
spark-worker-controller-vpgyg 1/1 Running 0 33s
|
|
||||||
|
|
||||||
$ kubectl logs spark-master
|
```console
|
||||||
|
$ kubectl get pods
|
||||||
|
NAME READY STATUS RESTARTS AGE
|
||||||
|
spark-master-controller-5u0q5 1/1 Running 0 25m
|
||||||
|
spark-worker-controller-e8otp 1/1 Running 0 6m
|
||||||
|
spark-worker-controller-fiivl 1/1 Running 0 6m
|
||||||
|
spark-worker-controller-ytc7o 1/1 Running 0 6m
|
||||||
|
|
||||||
|
$ kubectl logs spark-master-controller-5u0q5
|
||||||
[...]
|
[...]
|
||||||
15/06/26 14:15:43 INFO Master: Registering worker 10.244.2.35:46199 with 1 cores, 2.6 GB RAM
|
15/10/26 18:20:14 INFO Master: Registering worker 10.244.1.13:53567 with 2 cores, 6.3 GB RAM
|
||||||
15/06/26 14:15:55 INFO Master: Registering worker 10.244.1.15:44839 with 1 cores, 2.6 GB RAM
|
15/10/26 18:20:14 INFO Master: Registering worker 10.244.2.7:46195 with 2 cores, 6.3 GB RAM
|
||||||
15/06/26 14:15:55 INFO Master: Registering worker 10.244.0.19:60970 with 1 cores, 2.6 GB RAM
|
15/10/26 18:20:14 INFO Master: Registering worker 10.244.3.8:39926 with 2 cores, 6.3 GB RAM
|
||||||
```
|
```
|
||||||
|
|
||||||
## Step Three: Start your Spark driver to launch jobs on your Spark cluster
|
## Step Three: Start your Spark driver to launch jobs on your Spark cluster
|
||||||
|
|
||||||
The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
|
The Spark driver is used to launch jobs into Spark cluster. You can read more about it in
|
||||||
[Spark architecture](http://spark.apache.org/docs/latest/cluster-overview.html).
|
[Spark architecture](https://spark.apache.org/docs/latest/cluster-overview.html).
|
||||||
|
|
||||||
```shell
|
```console
|
||||||
$ kubectl create -f examples/spark/spark-driver.json
|
$ kubectl create -f examples/spark/spark-driver-controller.yaml
|
||||||
|
replicationcontrollers/spark-driver-controller
|
||||||
```
|
```
|
||||||
|
|
||||||
The Spark driver needs the Master service to be running.
|
The Spark driver needs the Master service to be running.
|
||||||
|
|
||||||
### Check to see if the driver is running
|
### Check to see if the driver is running
|
||||||
|
|
||||||
```shell
|
```console
|
||||||
$ kubectl get pods
|
$ kubectl get pods -lcomponent=spark-driver
|
||||||
NAME READY REASON RESTARTS AGE
|
NAME READY STATUS RESTARTS AGE
|
||||||
[...]
|
spark-driver-controller-vwb9c 1/1 Running 0 1m
|
||||||
spark-master 1/1 Running 0 14m
|
|
||||||
spark-driver 1/1 Running 0 10m
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Step Four: Do something with the cluster
|
## Step Four: Do something with the cluster
|
||||||
|
|
||||||
Use the kubectl exec to connect to Spark driver
|
Use the kubectl exec to connect to Spark driver and run a pipeline.
|
||||||
|
|
||||||
```
|
```console
|
||||||
$ kubectl exec spark-driver -it bash
|
$ kubectl exec spark-driver-controller-vwb9c -it pyspark
|
||||||
root@spark-driver:/#
|
|
||||||
root@spark-driver:/# pyspark
|
|
||||||
Python 2.7.9 (default, Mar 1 2015, 12:57:24)
|
Python 2.7.9 (default, Mar 1 2015, 12:57:24)
|
||||||
[GCC 4.9.2] on linux2
|
[GCC 4.9.2] on linux2
|
||||||
Type "help", "copyright", "credits" or "license" for more information.
|
Type "help", "copyright", "credits" or "license" for more information.
|
||||||
15/06/26 14:25:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
|
|
||||||
Welcome to
|
Welcome to
|
||||||
____ __
|
____ __
|
||||||
/ __/__ ___ _____/ /__
|
/ __/__ ___ _____/ /__
|
||||||
_\ \/ _ \/ _ `/ __/ '_/
|
_\ \/ _ \/ _ `/ __/ '_/
|
||||||
/__ / .__/\_,_/_/ /_/\_\ version 1.4.0
|
/__ / .__/\_,_/_/ /_/\_\ version 1.5.1
|
||||||
/_/
|
/_/
|
||||||
|
|
||||||
Using Python version 2.7.9 (default, Mar 1 2015 12:57:24)
|
Using Python version 2.7.9 (default, Mar 1 2015 12:57:24)
|
||||||
SparkContext available as sc, HiveContext available as sqlContext.
|
SparkContext available as sc, HiveContext available as sqlContext.
|
||||||
>>> import socket
|
>>> sc.textFile("gs://dataflow-samples/shakespeare/*").map(lambda s: len(s.split())).sum()
|
||||||
>>> sc.parallelize(range(1000)).map(lambda x:socket.gethostname()).distinct().collect()
|
939193
|
||||||
['spark-worker-controller-u40r2', 'spark-worker-controller-hifwi', 'spark-worker-controller-vpgyg']
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Congratulations, you just counted all of the words in all of the plays of
|
||||||
|
Shakespeare.
|
||||||
|
|
||||||
## Result
|
## Result
|
||||||
|
|
||||||
You now have services, replication controllers, and pods for the Spark master , Spark driver and Spark workers.
|
You now have services and replication controllers for the Spark master, Spark
|
||||||
You can take this example to the next step and start using the Apache Spark cluster
|
workers and Spark driver. You can take this example to the next step and start
|
||||||
you just created, see [Spark documentation](https://spark.apache.org/documentation.html)
|
using the Apache Spark cluster you just created, see
|
||||||
for more information.
|
[Spark documentation](https://spark.apache.org/documentation.html) for more
|
||||||
|
information.
|
||||||
|
|
||||||
## tl;dr
|
## tl;dr
|
||||||
|
|
||||||
```kubectl create -f spark-master.json```
|
```console
|
||||||
|
kubectl create -f examples/spark/spark-master-controller.yaml
|
||||||
|
kubectl create -f examples/spark/spark-master-service.yaml
|
||||||
|
kubectl create -f examples/spark/spark-webui.yaml
|
||||||
|
kubectl create -f examples/spark/spark-worker-controller.yaml
|
||||||
|
kubectl create -f examples/spark/spark-driver-controller.yaml
|
||||||
|
```
|
||||||
|
|
||||||
```kubectl create -f spark-master-service.json```
|
After it's setup:
|
||||||
|
|
||||||
Make sure the Master Pod is running (use: ```kubectl get pods```).
|
|
||||||
|
|
||||||
```kubectl create -f spark-worker-controller.json```
|
|
||||||
|
|
||||||
```kubectl create -f spark-driver.json```
|
|
||||||
|
|
||||||
|
```console
|
||||||
|
kubectl get pods # Make sure everything is running
|
||||||
|
kubectl get services spark-webui # Get the IP of the Spark WebUI
|
||||||
|
kubectl get pods -lcomponent=spark-driver # Get the driver pod to interact with.
|
||||||
|
```
|
||||||
|
|
||||||
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
|
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
|
||||||
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()
|
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/examples/spark/README.md?pixel)]()
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
all: push
|
||||||
|
|
||||||
|
# To bump the Spark version, bump the version in base/Dockerfile, bump
|
||||||
|
# this tag and reset to v1. You should also double check the native
|
||||||
|
# Hadoop libs at that point (we grab the 2.6.1 libs, which are
|
||||||
|
# appropriate for 1.5.1-with-2.6).
|
||||||
|
TAG = 1.5.1_v1
|
||||||
|
|
||||||
|
containers:
|
||||||
|
docker build -t gcr.io/google_containers/spark-base base
|
||||||
|
docker tag gcr.io/google_containers/spark-base gcr.io/google_containers/spark-base:$(TAG)
|
||||||
|
docker build -t gcr.io/google_containers/spark-worker worker
|
||||||
|
docker tag gcr.io/google_containers/spark-worker gcr.io/google_containers/spark-worker:$(TAG)
|
||||||
|
docker build -t gcr.io/google_containers/spark-master master
|
||||||
|
docker tag gcr.io/google_containers/spark-master gcr.io/google_containers/spark-master:$(TAG)
|
||||||
|
docker build -t gcr.io/google_containers/spark-driver driver
|
||||||
|
docker tag gcr.io/google_containers/spark-driver gcr.io/google_containers/spark-driver:$(TAG)
|
||||||
|
|
||||||
|
push: containers
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-base
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-base:$(TAG)
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-worker
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-worker:$(TAG)
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-master
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-master:$(TAG)
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-driver
|
||||||
|
gcloud docker push gcr.io/google_containers/spark-driver:$(TAG)
|
||||||
|
|
||||||
|
clean:
|
|
@ -3,15 +3,35 @@ FROM java:latest
|
||||||
RUN apt-get update -y
|
RUN apt-get update -y
|
||||||
RUN apt-get install -y scala
|
RUN apt-get install -y scala
|
||||||
|
|
||||||
# Get Spark from some apache mirror.
|
ENV hadoop_ver 2.6.1
|
||||||
|
ENV spark_ver 1.5.1
|
||||||
|
|
||||||
|
# Get Hadoop from US Apache mirror and extract just the native
|
||||||
|
# libs. (Until we care about running HDFS with these containers, this
|
||||||
|
# is all we need.)
|
||||||
RUN mkdir -p /opt && \
|
RUN mkdir -p /opt && \
|
||||||
cd /opt && \
|
cd /opt && \
|
||||||
wget http://apache.mirrors.pair.com/spark/spark-1.4.0/spark-1.4.0-bin-hadoop2.6.tgz && \
|
wget http://www.us.apache.org/dist/hadoop/common/hadoop-${hadoop_ver}/hadoop-${hadoop_ver}.tar.gz && \
|
||||||
tar -zvxf spark-1.4.0-bin-hadoop2.6.tgz && \
|
tar -zvxf hadoop-${hadoop_ver}.tar.gz hadoop-${hadoop_ver}/lib/native && \
|
||||||
rm spark-1.4.0-bin-hadoop2.6.tgz && \
|
rm hadoop-${hadoop_ver}.tar.gz && \
|
||||||
ln -s spark-1.4.0-bin-hadoop2.6 spark && \
|
ln -s hadoop-${hadoop_ver} hadoop && \
|
||||||
echo Spark installed in /opt
|
echo Hadoop ${hadoop_ver} native libraries installed in /opt/hadoop/lib/native
|
||||||
|
|
||||||
|
# Get Spark from US Apache mirror.
|
||||||
|
RUN mkdir -p /opt && \
|
||||||
|
cd /opt && \
|
||||||
|
wget http://www.us.apache.org/dist/spark/spark-${spark_ver}/spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||||
|
tar -zvxf spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||||
|
rm spark-${spark_ver}-bin-hadoop2.6.tgz && \
|
||||||
|
ln -s spark-${spark_ver}-bin-hadoop2.6 spark && \
|
||||||
|
echo Spark ${spark_ver} installed in /opt
|
||||||
|
|
||||||
|
# Add the GCS connector.
|
||||||
|
RUN wget -O /opt/spark/lib/gcs-connector-latest-hadoop2.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-latest-hadoop2.jar
|
||||||
|
|
||||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||||
ADD setup_client.sh /
|
ADD setup_client.sh /
|
||||||
|
ADD start-common.sh /
|
||||||
|
ADD core-site.xml /opt/spark/conf/core-site.xml
|
||||||
|
ADD spark-defaults.conf /opt/spark/conf/spark-defaults.conf
|
||||||
ENV PATH $PATH:/opt/spark/bin
|
ENV PATH $PATH:/opt/spark/bin
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.gs.impl</name>
|
||||||
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem</value>
|
||||||
|
<description>The FileSystem for gs: (GCS) uris.</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.AbstractFileSystem.gs.impl</name>
|
||||||
|
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
|
||||||
|
<description>The AbstractFileSystem for gs: (GCS) uris. Only necessary for use with Hadoop 2.</description>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.gs.project.id</name>
|
||||||
|
<value>NOT_RUNNING_INSIDE_GCE</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,5 @@
|
||||||
|
spark.master spark://spark-master:7077
|
||||||
|
spark.executor.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
|
||||||
|
spark.driver.extraClassPath /opt/spark/lib/gcs-connector-latest-hadoop2.jar
|
||||||
|
spark.driver.extraLibraryPath /opt/hadoop/lib/native
|
||||||
|
spark.app.id KubernetesSpark
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
PROJECT_ID=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
|
||||||
|
|
||||||
|
if [[ -n "${PROJECT_ID}" ]]; then
|
||||||
|
sed -i "s/NOT_RUNNING_INSIDE_GCE/${PROJECT_ID}/" /opt/spark/conf/core-site.xml
|
||||||
|
fi
|
||||||
|
|
||||||
|
# We don't want any of the incoming service variables, we'd rather use
|
||||||
|
# DNS. But this one interferes directly with Spark.
|
||||||
|
unset SPARK_MASTER_PORT
|
||||||
|
|
||||||
|
# spark.{executor,driver}.extraLibraryPath don't actually seem to
|
||||||
|
# work, this seems to be the only reliable way to get the native libs
|
||||||
|
# picked up.
|
||||||
|
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/hadoop/lib/native
|
|
@ -1,3 +1,3 @@
|
||||||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
FROM gcr.io/google_containers/spark-base:latest
|
||||||
ADD start.sh /start.sh
|
ADD start.sh /start.sh
|
||||||
CMD ["/start.sh"]
|
CMD ["/start.sh"]
|
||||||
|
|
|
@ -14,9 +14,8 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
echo "$SPARK_MASTER_SERVICE_HOST spark-master" >> /etc/hosts
|
. /start-common.sh
|
||||||
echo "SPARK_LOCAL_HOSTNAME=$(hostname -i)" >> /opt/spark/conf/spark-env.sh
|
|
||||||
echo "MASTER=spark://spark-master:$SPARK_MASTER_SERVICE_PORT" >> /opt/spark/conf/spark-env.sh
|
|
||||||
echo "Use kubectl exec spark-driver -it bash to invoke commands"
|
echo "Use kubectl exec spark-driver -it bash to invoke commands"
|
||||||
while true; do
|
while true; do
|
||||||
sleep 100
|
sleep 100
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
FROM gcr.io/google_containers/spark-base:latest
|
||||||
|
|
||||||
ADD start.sh /
|
ADD start.sh /
|
||||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||||
EXPOSE 7077
|
EXPOSE 7077 8080
|
||||||
|
|
||||||
ENTRYPOINT ["/start.sh"]
|
ENTRYPOINT ["/start.sh"]
|
||||||
|
|
|
@ -14,6 +14,11 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
export SPARK_MASTER_PORT=${SPARK_MASTER_SERVICE_PORT:-7077}
|
. /start-common.sh
|
||||||
|
|
||||||
|
echo "$(hostname -i) spark-master" >> /etc/hosts
|
||||||
|
export SPARK_LOCAL_HOSTNAME=spark-master
|
||||||
|
export SPARK_MASTER_IP=spark-master
|
||||||
|
|
||||||
/opt/spark/sbin/start-master.sh
|
/opt/spark/sbin/start-master.sh
|
||||||
tail -F /opt/spark/logs/*
|
tail -F /opt/spark/logs/*
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
FROM gcr.io/google_containers/spark-base:1.4.0_v1
|
FROM gcr.io/google_containers/spark-base:latest
|
||||||
|
|
||||||
ADD start.sh /
|
ADD start.sh /
|
||||||
ADD log4j.properties /opt/spark/conf/log4j.properties
|
ADD log4j.properties /opt/spark/conf/log4j.properties
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
||||||
ENTRYPOINT ["/start.sh"]
|
ENTRYPOINT ["/start.sh"]
|
||||||
|
|
|
@ -14,15 +14,8 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
if [[ ${SPARK_MASTER_SERVICE_HOST} == "" ]]; then
|
. /start-common.sh
|
||||||
echo "Spark Master service must be created before starting any workers"
|
|
||||||
sleep 30 # To postpone pod restart
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "${SPARK_MASTER_SERVICE_HOST} spark-master" >> /etc/hosts
|
/opt/spark/sbin/start-slave.sh spark://spark-master:7077
|
||||||
export SPARK_LOCAL_HOSTNAME=$(hostname -i)
|
|
||||||
|
|
||||||
/opt/spark/sbin/start-slave.sh spark://spark-master:${SPARK_MASTER_SERVICE_PORT}
|
|
||||||
|
|
||||||
tail -F /opt/spark/logs/*
|
tail -F /opt/spark/logs/*
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
kind: ReplicationController
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: spark-driver-controller
|
||||||
|
labels:
|
||||||
|
component: spark-driver
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
component: spark-driver
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
component: spark-driver
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: spark-driver
|
||||||
|
image: gcr.io/google_containers/spark-driver:1.5.1_v1
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
|
@ -1,23 +0,0 @@
|
||||||
{
|
|
||||||
"kind": "Pod",
|
|
||||||
"apiVersion": "v1",
|
|
||||||
"metadata": {
|
|
||||||
"name": "spark-driver",
|
|
||||||
"labels": {
|
|
||||||
"name": "spark-driver"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"containers": [
|
|
||||||
{
|
|
||||||
"name": "spark-driver",
|
|
||||||
"image": "gcr.io/google_containers/spark-driver:1.4.0_v1",
|
|
||||||
"resources": {
|
|
||||||
"limits": {
|
|
||||||
"cpu": "100m"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -3,7 +3,7 @@ apiVersion: v1
|
||||||
metadata:
|
metadata:
|
||||||
name: spark-master-controller
|
name: spark-master-controller
|
||||||
labels:
|
labels:
|
||||||
component: spark-master-controller
|
component: spark-master
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
|
@ -15,14 +15,14 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: spark-master
|
- name: spark-master
|
||||||
image: gcr.io/google_containers/spark-master
|
image: gcr.io/google_containers/spark-master:1.5.1_v1
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 7077
|
- containerPort: 7077
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /mnt/glusterfs
|
- mountPath: /mnt/glusterfs
|
||||||
name: glusterfsvol
|
name: glusterfsvol
|
||||||
resources:
|
resources:
|
||||||
limits:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
volumes:
|
volumes:
|
||||||
- name: glusterfsvol
|
- name: glusterfsvol
|
||||||
|
|
|
@ -9,4 +9,4 @@ spec:
|
||||||
- port: 7077
|
- port: 7077
|
||||||
targetPort: 7077
|
targetPort: 7077
|
||||||
selector:
|
selector:
|
||||||
component: spark-master-controller
|
component: spark-master
|
||||||
|
|
|
@ -12,18 +12,18 @@ spec:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
component: spark-worker
|
component: spark-worker
|
||||||
uses: spark-master-controller
|
uses: spark-master
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: spark-worker
|
- name: spark-worker
|
||||||
image: gcr.io/google_containers/spark-worker
|
image: gcr.io/google_containers/spark-worker:1.5.1_v1
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8888
|
- containerPort: 8888
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /mnt/glusterfs
|
- mountPath: /mnt/glusterfs
|
||||||
name: glusterfsvol
|
name: glusterfsvol
|
||||||
resources:
|
resources:
|
||||||
limits:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
volumes:
|
volumes:
|
||||||
- name: glusterfsvol
|
- name: glusterfsvol
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
kind: ReplicationController
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: spark-master-controller
|
||||||
|
labels:
|
||||||
|
component: spark-master
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
component: spark-master
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
component: spark-master
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: spark-master
|
||||||
|
image: gcr.io/google_containers/spark-master:1.5.1_v1
|
||||||
|
ports:
|
||||||
|
- containerPort: 7077
|
||||||
|
- containerPort: 8080
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
|
@ -1,21 +0,0 @@
|
||||||
{
|
|
||||||
"kind": "Service",
|
|
||||||
"apiVersion": "v1",
|
|
||||||
"metadata": {
|
|
||||||
"name": "spark-master",
|
|
||||||
"labels": {
|
|
||||||
"name": "spark-master"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"ports": [
|
|
||||||
{
|
|
||||||
"port": 7077,
|
|
||||||
"targetPort": 7077
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"selector": {
|
|
||||||
"name": "spark-master"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: spark-master
|
||||||
|
labels:
|
||||||
|
component: spark-master-service
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 7077
|
||||||
|
targetPort: 7077
|
||||||
|
selector:
|
||||||
|
component: spark-master
|
|
@ -1,28 +0,0 @@
|
||||||
{
|
|
||||||
"kind": "Pod",
|
|
||||||
"apiVersion": "v1",
|
|
||||||
"metadata": {
|
|
||||||
"name": "spark-master",
|
|
||||||
"labels": {
|
|
||||||
"name": "spark-master"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"containers": [
|
|
||||||
{
|
|
||||||
"name": "spark-master",
|
|
||||||
"image": "gcr.io/google_containers/spark-master:1.4.0_v1",
|
|
||||||
"ports": [
|
|
||||||
{
|
|
||||||
"containerPort": 7077
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"resources": {
|
|
||||||
"limits": {
|
|
||||||
"cpu": "100m"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: spark-webui
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
targetPort: 8080
|
||||||
|
selector:
|
||||||
|
component: spark-master
|
||||||
|
type: LoadBalancer
|
|
@ -1,43 +0,0 @@
|
||||||
{
|
|
||||||
"kind": "ReplicationController",
|
|
||||||
"apiVersion": "v1",
|
|
||||||
"metadata": {
|
|
||||||
"name": "spark-worker-controller",
|
|
||||||
"labels": {
|
|
||||||
"name": "spark-worker"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"replicas": 3,
|
|
||||||
"selector": {
|
|
||||||
"name": "spark-worker"
|
|
||||||
},
|
|
||||||
"template": {
|
|
||||||
"metadata": {
|
|
||||||
"labels": {
|
|
||||||
"name": "spark-worker",
|
|
||||||
"uses": "spark-master"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"containers": [
|
|
||||||
{
|
|
||||||
"name": "spark-worker",
|
|
||||||
"image": "gcr.io/google_containers/spark-worker:1.4.0_v1",
|
|
||||||
"ports": [
|
|
||||||
{
|
|
||||||
"hostPort": 8888,
|
|
||||||
"containerPort": 8888
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"resources": {
|
|
||||||
"limits": {
|
|
||||||
"cpu": "100m"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
kind: ReplicationController
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: spark-worker-controller
|
||||||
|
labels:
|
||||||
|
component: spark-worker
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
selector:
|
||||||
|
component: spark-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
component: spark-worker
|
||||||
|
uses: spark-master
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: spark-worker
|
||||||
|
image: gcr.io/google_containers/spark-worker:1.5.1_v1
|
||||||
|
ports:
|
||||||
|
- containerPort: 8888
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
Loading…
Reference in New Issue