spark叢集安裝部署
阿新 • • 發佈:2021-12-21
1.在官網下載spark安裝包
# wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
2.解壓
# tar -zxvf spark-2.4.8-bin-hadoop2.7.tgz -C /home/hadoop/app
3.修改配置
# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7/conf/ # cp spark-env.sh.template spark-env.sh # cp slaves.template slaves # cp spark-defaults.conf.template spark-defaults.conf # vim spark-env.sh 新增 export HADOOP_CONF_DIR=/home/hadoop/app/hadoop-2.7.5/etc/hadoop export HADOOP_HOME=/home/hadoop/app/hadoop-2.7.5 export JAVA_HOME=/opt/jdk1.8.0_202 export SPARK_HOME=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7 export SCALA_HOME=/home/hadoop/app/scala-2.11.8 export SPARK_LOG_DIR=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7/logs export SPARK_PID_DIR=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7/logs/pid 修改spark-defaults.conf # vim spark-defaults.conf 新增 spark.eventLog.enabled true spark.eventLog.dir hdfs://ns1/spark/eventLog spark.rdd.compress true spark.driver.mebaiwanry 4G spark.yarn.historyServer.address dba-01:18080 spark.history.ui.port 18080 spark.history.fs.logDirectory hdfs://ns1/spark/eventLog spark.yarn.maxAppAttempts 4 spark.yarn.stagingDir hdfs://ns1/spark/stagingDir spark.yarn.singleContainerPerNode false spark.yarn.allocator.waitTime 60s spark.logConf true spark.ui.killEnabled false spark.streaming.backpressure.initialRate 1000 spark.streaming.kafka.maxRatePerPartition 10000 spark.streaming.blockInterval 1000 spark.streaming.backpressure.enabled true spark.streaming.receiver.maxRate 10000 spark.streaming.kafka.maxRetries 10 spark.default.parallelism 64 spark.streaming.dynamicAllocation.enabled false spark.streaming.dynamicAllocation.minExecutors 1 spark.streaming.dynamicAllocation.maxExecutors 50 spark.shuffle.service.enabled true spark.dynamicAllocation.enabled true spark.dynamicAllocation.minExecutors 1 spark.dynamicAllocation.maxExecutors 20 spark.driver.maxResultSize 4g 修改slaves # vim slaves 新增 dba-01 dba-02 dba-03
4.建立目錄
# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7
# mkdir -p logs/pid
# hdfs dfs -mkdir -p /spark/stagingDir
# hdfs dfs -mkdir -p /spark/eventLog
5.傳輸到其他節點
# cd /home/hadoop/app # scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-02:/home/hadoop/app # scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-03:/home/hadoop/app # scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-04:/home/hadoop/app # scp -r spark-2.3.1-bin-hadoop2.7 hadoop@dba-05:/home/hadoop/app
6.任意一個節點啟動spark叢集
# cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7/sbin
# ./start-all.sh
7.新增spark環境變數
# vim /etc/profile
export SPARK_HOME=/home/hadoop/app/spark-2.3.1-bin-hadoop2.7
export PATH=$SPARK_HOME/bin
# source /etc/profile