1. 程式人生 > 其它 >spark叢集安裝部署

spark叢集安裝部署

1.在官網下載spark安裝包

# wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz

2.解壓

# tar -zxvf spark-2.4.8-bin-hadoop2.7.tgz -C /home/hadoop/app

3.修改配置

# cd  /home/hadoop/app/spark-2.3.1-bin-hadoop2.7/conf/
# cp spark-env.sh.template spark-env.sh
# cp slaves.template slaves
# cp spark-defaults.conf.template spark-defaults.conf
# vim spark-env.sh

新增
export HADOOP_CONF_DIR=/home/hadoop/app/hadoop-2.7.5/etc/hadoop
export HADOOP_HOME=/home/hadoop/app/hadoop-2.7.5
export JAVA_HOME=/opt/jdk1.8.0_202
export SPARK_HOME=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7
export SCALA_HOME=/home/hadoop/app/scala-2.11.8
export SPARK_LOG_DIR=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7/logs
export SPARK_PID_DIR=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7/logs/pid

修改spark-defaults.conf
# vim spark-defaults.conf

新增
spark.eventLog.enabled                             true
spark.eventLog.dir                                 hdfs://ns1/spark/eventLog
spark.rdd.compress                                 true
spark.driver.mebaiwanry                                4G
spark.yarn.historyServer.address                   dba-01:18080
spark.history.ui.port                              18080
spark.history.fs.logDirectory                      hdfs://ns1/spark/eventLog
spark.yarn.maxAppAttempts                          4
spark.yarn.stagingDir                              hdfs://ns1/spark/stagingDir

spark.yarn.singleContainerPerNode                  false
spark.yarn.allocator.waitTime                      60s
spark.logConf                                      true
spark.ui.killEnabled                               false
spark.streaming.backpressure.initialRate           1000
spark.streaming.kafka.maxRatePerPartition         10000
spark.streaming.blockInterval                     1000
spark.streaming.backpressure.enabled              true
spark.streaming.receiver.maxRate                  10000
spark.streaming.kafka.maxRetries                  10
spark.default.parallelism                         64
spark.streaming.dynamicAllocation.enabled         false
spark.streaming.dynamicAllocation.minExecutors    1
spark.streaming.dynamicAllocation.maxExecutors    50
spark.shuffle.service.enabled             true
spark.dynamicAllocation.enabled           true
spark.dynamicAllocation.minExecutors      1
spark.dynamicAllocation.maxExecutors      20
spark.driver.maxResultSize  4g

修改slaves
# vim slaves
新增
dba-01
dba-02
dba-03

4.建立目錄

# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7
# mkdir -p logs/pid
# hdfs dfs -mkdir -p /spark/stagingDir
# hdfs dfs -mkdir -p /spark/eventLog

5.傳輸到其他節點

# cd /home/hadoop/app
# scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-02:/home/hadoop/app
# scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-03:/home/hadoop/app
# scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-04:/home/hadoop/app
# scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-05:/home/hadoop/app

6.任意一個節點啟動spark叢集

# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7/sbin
# ./start-all.sh

7.新增spark環境變數

# vim /etc/profile
export SPARK_HOME=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7
export PATH=$SPARK_HOME/bin

# source /etc/profile