[SPARK] Install & Cluster 구성

Spark 설치

wget https://mirror.navercorp.com/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz

tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz

sudo vi /etc/profile

export SPARK_HOME=/usr/local/spark-3.1.2-bin-hadoop3.2
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin

cd $SPARK_HOME/conf

cp workers.template workers

vi workers

spark-hsjang--1
spark-hsjang--2
spark-hsjang--3

cp spark.env.sh.template spark.env.sh

vi spark.env.sh

export SPARK_MASTER_IP=spark-hsjang--1
export SPARK_WORKER_CORES=4
export SPARK_WORKER_MEMORY=3g
export SPARK_WORKER_INSTANCE=1
export PYSPARK_PYTHON=/usr/bin/python ##python version을 바꾸고싶으면

하둡과의 연동

sudo vi $SPARK_HOME/conf/spark-env.sh

export HADOOP_HOME=/usr/local/hadoop-3.3.1
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop

mv $SPARK_HOME/conf/spark-defaults.conf.template $SPARK_HOME/conf/spark-defaults.conf

# 아래 설정 추가
spark.master                     yarn