(1)下载安装包并解压到指定目录
(2)修改配置文件,并添加必要变量
(3)分发到其他节点,配置环境变量
以dataxc用户为例,脚本示例如下:
#!/bin/bash
# hadoop.sh
nodes=(n101 n102 n103)
#集群地址
zk_connect='n101:2181,n102:2181,n103:2181'
hdns='n101:8485;n102:8485;n103:8485'
#解压hadoop到程序目录
cd /home/dataxc/sw && tar -zxvf hadoop-3.3.1.tar.gz -C /home/dataxc/opt
#为hadoop指定java路径和时区
sed -i 's!# export JAVA_HOME=!export JAVA_HOME=/home/dataxc/opt/jdk1.8.0_301!' /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/hadoop-env.sh
echo -e 'export HADOOP_OPTS="$HADOOP_OPTS -Duser.timezone='Asia/Shanghai'"' >> /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/hadoop-env.sh
#配置core-site.xml
sed -i '/^<configuration>/,$d' /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/core-site.xml
echo -e "\
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hdcluster/</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/dataxc/opt/hadoop-3.3.1/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>master:$zk_connect</value>
</property>
</configuration>" >> /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/core-site.xml
#配置hdfs-site.xml
sed -i '/^<configuration>/,$d' /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/hdfs-site.xml
echo -e "\
<configuration>
<property>
<name>dfs.nameservices</name>
<value>hdcluster</value>
</property>
<property>
<name>dfs.ha.namenodes.hdcluster</name>
<value>nn1,nn2</value>
</property>\n
<property>
<name>dfs.namenode.rpc-address.hdcluster.nn1</name>
<value>n101:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.hdcluster.nn1</name>
<value>n101:50070</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.hdcluster.nn1</name>
<value>n101:53310</value>
</property>\n
<property>
<name>dfs.namenode.rpc-address.hdcluster.nn2</name>
<value>n102:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.hdcluster.nn2</name>
<value>n102:50070</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.hdcluster.nn2</name>
<value>n102:53310</value>
</property>\n
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://$hdns/hdcluster</value>
</property>
<property>
<name> dfs.namenode.name.dir </name>
<value>/home/dataxc/opt/hadoop-3.3.1/data/hdcluster</value>
<final>true</final>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/dataxc/opt/hadoop-3.3.1/data/journal</value>
</property>\n
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.hdcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>\n
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/dataxc/.ssh/id_rsa</value>
</property>\n
<property>
<name>dfs.replication</name>
<value>3</value>
</property>\n
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>" >> /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/hdfs-site.xml
#配置yarn-site.xml
sed -i '/^<configuration>/,$d' /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/yarn-site.xml
echo -e "\
<configuration>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm_ha_id</value>
</property>\n
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>n101</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>n102</value>
</property>\n
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>\n
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>$zk_connect</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>\n
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>-1</value>
</property>
<property>
<name>yarn.nodemanager.resource.detect-hardware-capabilities</name>
<value>true</value>
</property>
</configuration>" >> /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/yarn-site.xml
#配置mapred-site.xml
sed -i '/^<configuration>/,$d' /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/mapred-site.xml
echo -e "\
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.tasktracker.outofband.heartbeat</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>" >> /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/mapred-site.xml
#添加变量
sed -i '1a export HDFS_ZKFC_USER=dataxc\
export HDFS_JOURNALNODE_USER=dataxc\
export HDFS_SECONDARYNAMENODE_USER=dataxc\
export HDFS_NAMENODE_USER=dataxc\
export HDFS_DATANODE_SECURE_USER=hdfs\
export HDFS_DATANODE_USER=dataxc' /home/dataxc/opt/hadoop-3.3.1/sbin/{start-dfs.sh,stop-dfs.sh}
sed -i '1a export YARN_PROXYSERVER_USER=dataxc\
export YARN_NODEMANAGER_USER=dataxc\
export HADOOP_SECURE_DN_USER=yarn\
export YARN_RESOURCEMANAGER_USER=dataxc' /home/dataxc/opt/hadoop-3.3.1/sbin/{start-yarn.sh,stop-yarn.sh}
#添加集群节点到workers
echo -e "n101\nn102\nn103" > /home/dataxc/opt/hadoop-3.3.1/etc/hadoop/workers
#分发到其他节点
for node in ${nodes[*]:1}
do
scp -r /home/dataxc/opt/hadoop-3.3.1 dataxc@$node:/home/dataxc/opt
done
#添加环境变量
for node in ${nodes[*]}
do
ssh dataxc@$node 'sed -i -e "/export JAVA_HOME=/a export HADOOP_HOME=/home/dataxc/opt/hadoop-3.3.1" \
-e "/^export PATH=/ s/$/\:\$HADOOP_HOME\/bin\:\$HADOOP_HOME\/sbin/" /home/dataxc/.bashrc;
source /home/dataxc/.bashrc'
done
:<<!
#所有节点启动journalnode(若未启动zk集群,则先启动zk)
/home/dataxc/opt/hadoop-3.3.1/bin/hdfs --workers --daemon start journalnode
#首次启动,格式化NameNode
/home/dataxc/opt/hadoop-3.3.1/bin/hdfs namenode -format
#启动NameNode
/home/dataxc/opt/hadoop-3.3.1/bin/hdfs --daemon start namenode
#同步元数据
ssh dataxc@n102 /home/dataxc/opt/hadoop-3.3.1/bin/hdfs namenode -bootstrapStandby
#首次启动,格式化zk
/home/dataxc/opt/hadoop-3.3.1/bin/hdfs zkfc -formatZK
#启动hdfs
/home/dataxc/opt/hadoop-3.3.1/sbin/start-dfs.sh
#启动yarn
/home/dataxc/opt/hadoop-3.3.1/sbin/start-yarn.sh
!
#end
|