Before You Install#
Storage Space Planning for Cloudera Manager#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
| # 1. Set the hostname to a unique name (not localhost).
ansible namenode-01.example.com -m ansible.builtin.command -a "hostnamectl set-hostname namenode-01.example.com"
ansible namenode-02.example.com -m ansible.builtin.command -a "hostnamectl set-hostname namenode-02.example.com"
ansible datanode-01.example.com -m ansible.builtin.command -a "hostnamectl set-hostname datanode-01.example.com"
ansible datanode-02.example.com -m ansible.builtin.command -a "hostnamectl set-hostname datanode-02.example.com"
ansible datanode-03.example.com -m ansible.builtin.command -a "hostnamectl set-hostname datanode-03.example.com"
ansible hadoops -m ansible.builtin.command -a "hostname"
# 2. Edit /etc/hosts with the IP address and fully qualified domain name (FQDN) of each host in the cluster. You can add the unqualified name as well.
cat <<EOF > hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.2.201 namenode-01.example.com namenode-01
192.168.2.202 namenode-02.example.com namenode-02
192.168.2.211 datanode-01.example.com datanode-01
192.168.2.212 datanode-02.example.com datanode-02
192.168.2.213 datanode-03.example.com datanode-03
EOF
ansible hadoops -m ansible.builtin.copy -a "src=./hosts dest=/etc/hosts owner=root group=root mode=0644"
ansible hadoops -m ansible.builtin.command -a "cat /etc/hosts"
ansible hadoops -m ansible.builtin.command -a "ls -l /etc/hosts"
rm hosts
# 3. Edit /etc/sysconfig/network with the FQDN of this host only
ansible hadoops -m ansible.builtin.shell -a 'echo "# Created by anaconda" > /etc/sysconfig/network'
ansible hadoops -m ansible.builtin.shell -a 'echo "HOSTNAME=$HOSTNAME" >> /etc/sysconfig/network'
ansible hadoops -m ansible.builtin.command -a "cat /etc/sysconfig/network"
# 4. Verify that each host consistently identifies to the network
ansible hadoops -m ansible.builtin.command -a "uname -a"
# /sbin/ifconfig
# host -v -t A $(hostname)
### 5.格式化lv
# mkfs.xfs -f /dev/sdb
parted /dev/sdb
mklabel gpt
yes
quit
fdisk -l
fdisk /dev/sdb
mkfs.xfs -f /dev/sdb1
### 6.创建并挂载
mkdir /data
mount /dev/sdb1 /data
df -Th
### 7.开机自动挂载
echo "/dev/sdb1 /data xfs defaults 0 0" >> /etc/fstab
mount -a
cat /etc/fstab
|
Disabling the Firewall#
1
2
3
| ansible hadoops -m ansible.builtin.command -a "systemctl disable firewalld"
ansible hadoops -m ansible.builtin.command -a "systemctl stop firewalld"
ansible hadoops -m ansible.builtin.command -a "systemctl status firewalld"
|
Setting SELinux mode#
1
2
3
4
5
| ansible hadoops -m ansible.builtin.command -a "getenforce"
ansible hadoops -m ansible.builtin.command -a "cat /etc/selinux/config"
ansible hadoops -m ansible.builtin.shell -a "sed -i 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/selinux/config"
ansible hadoops -m ansible.builtin.command -a "setenforce 0"
ansible hadoops -m ansible.builtin.command -a "getenforce"
|
Enable an NTP Service#
1
| ansible hadoops -m ansible.builtin.command -a "date"
|
系统参调整数#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| # 设置swap空间(所有节点)
ansible hadoops -m ansible.builtin.shell -a 'echo "vm.swappiness = 0" >> /etc/sysctl.conf'
ansible hadoops -m ansible.builtin.command -a "cat /etc/sysctl.conf"
ansible hadoops -m ansible.builtin.shell -a 'sysctl -p'
#设置用户最大可打开文件数,进程数,内存占用(所有节点)
ansible hadoops -m ansible.builtin.shell -a 'echo "* soft nofile 32728" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.shell -a 'echo "* hard nofile 1024999" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.shell -a 'echo "* soft nproc 65535" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.shell -a 'echo "* hard noroc unlimited" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.shell -a 'echo "* soft memlock unlimited" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.shell -a 'echo "* hard memlock unlimited" >> /etc/security/limits.conf'
ansible hadoops -m ansible.builtin.command -a "cat /etc/security/limits.conf"
# 关闭大页面压缩(所有节点
ansible hadoops -m ansible.builtin.shell -a 'echo never > /sys/kernel/mm/transparent_hugepage/enabled'
ansible hadoops -m ansible.builtin.shell -a 'echo never > /sys/kernel/mm/transparent_hugepage/defrag'
#这里所有节点都要设置一下,不然装好的集群主机会有警告(数据库机器如果是单独一台不需要此步骤)
ansible hadoops -m ansible.builtin.shell -a 'echo 10 > /proc/sys/vm/swappiness'
|
配置本地yum仓库#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| ansible jumper -become=yes --become-method=sudo -m ansible.builtin.apt -a 'name=apache2 state=present update_cache=yes'
# 配置Cloudera Manager源
sudo mkdir -p /var/www/html/cloudera-repos/cm6
sudo tar xvfz /usr/local/src/cm6.3.1/cm6.3.1-redhat7.tar.gz -C /var/www/html/cloudera-repos/cm6 --strip-components=1
sudo cp -a /usr/local/src/cdh6.3.2/allkeys.asc /var/www/html/cloudera-repos/cm6/
sudo cat <<EOF > /var/www/html/cloudera-repos/cm6/cloudera-manager.repo
[cm6]
name=cloudera-manager
baseurl=http://192.168.1.99/cloudera-repos/cm6
gpgcheck=1
gpgkey=http://192.168.1.99/cloudera-repos/cm6/RPM-GPG-KEY-cloudera
EOF
# 部署离线parcel源
sudo mkdir -p /var/www/html/cdh6_parcel
sudo mv /usr/local/src/cdh6.3.2/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel /var/www/html/cdh6_parcel/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel
sudo mv /usr/local/src/cdh6.3.2/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel.sha1 /var/www/html/cdh6_parcel/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel.sha
sudo mv /usr/local/src/cdh6.3.2/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel.sha256 /var/www/html/cdh6_parcel/CDH-6.3.2-1.cdh6.3.2.p0.1605554-el7.parcel.sha256
sudo mv /usr/local/src/cdh6.3.2/manifest.json /var/www/html/cdh6_parcel/manifest.json
sudo systemctl restart apache2
sudo systemctl enable apache2
|
Installing Cloudera Manager, CDH, and Managed Services#
1
2
3
| ansible hadoops -m ansible.builtin.yum -a 'name=wget state=present'
ansible hadoops -m ansible.builtin.shell -a 'wget http://192.168.1.99/cloudera-repos/cm6/cloudera-manager.repo -P /etc/yum.repos.d/'
ansible hadoops -m ansible.builtin.shell -a 'rpm --import http://192.168.1.99/cloudera-repos/cm6/RPM-GPG-KEY-cloudera'
|
Step 2: Install Java Development Kit#
1
2
3
4
5
6
7
8
| # # Manually Installing Oracle JDK
# tar xvfz /path/to/jdk-8u<update_version>-linux-x64.tar.gz -C /usr/java/
# Manually Installing OpenJDK
# ansible hadoops -m ansible.builtin.yum -a 'name=java-1.8.0-openjdk-devel state=absent'
# ansible hadoops -m ansible.builtin.yum -a 'name=java-1.8.0-openjdk state=absent'
# ansible hadoops -m ansible.builtin.yum -a 'name=java-1.8.0-openjdk-headless state=absent'
ansible namenode-01.example.com -m ansible.builtin.yum -a 'name=oracle-j2sdk1.8 state=present'
|
Step 3: Install Cloudera Manager Server#
1
2
3
4
| # Install Cloudera Manager Packages
## 1. On the Cloudera Manager Server host, type the following commands to install the Cloudera Manager packages.
ansible namenode-01.example.com -m ansible.builtin.yum -a 'name=cloudera-manager-daemons state=present'
ansible namenode-01.example.com -m ansible.builtin.yum -a 'name=cloudera-manager-server state=present'
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
| # Installing the MySQL Server
ansible hadoops -m ansible.builtin.yum -a 'name=mariadb-libs state=absent'
ansible namenode-01.example.com -m ansible.builtin.shell -a 'wget http://repo.mysql.com/mysql57-community-release-el7.rpm'
ansible namenode-01.example.com -m ansible.builtin.shell -a 'rpm -ivh mysql57-community-release-el7.rpm'
ansible namenode-01.example.com -m ansible.builtin.shell -a 'rpm --import https://repo.mysql.com/RPM-GPG-KEY-mysql-2022'
ansible namenode-01.example.com -m ansible.builtin.shell -a 'yum update -y'
ansible namenode-01.example.com -m ansible.builtin.yum -a 'name=mysql-server state=present'
ansible namenode-01.example.com -m ansible.builtin.service -a "name=mysqld state=started"
# Configuring and Starting the MySQL Server
ansible namenode-01.example.com -m ansible.builtin.service -a "name=mysqld state=stopped"
cat <<EOF > my.cnf
[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
transaction-isolation = READ-COMMITTED
# Disabling symbolic-links is recommended to prevent assorted security risks;
# to do so, uncomment this line:
symbolic-links = 0
key_buffer_size = 32M
max_allowed_packet = 16M
thread_stack = 256K
thread_cache_size = 64
query_cache_limit = 8M
query_cache_size = 64M
query_cache_type = 1
max_connections = 550
#expire_logs_days = 10
#max_binlog_size = 100M
#log_bin should be on a disk with enough free space.
#Replace '/var/lib/mysql/mysql_binary_log' with an appropriate path for your
#system and chown the specified folder to the mysql user.
log_bin=/var/lib/mysql/mysql_binary_log
#In later versions of MySQL, if you enable the binary log and do not set
#a server_id, MySQL will not start. The server_id must be unique within
#the replicating group.
server_id=1
binlog_format = mixed
read_buffer_size = 2M
read_rnd_buffer_size = 16M
sort_buffer_size = 8M
join_buffer_size = 8M
# InnoDB settings
innodb_file_per_table = 1
innodb_flush_log_at_trx_commit = 2
innodb_log_buffer_size = 64M
innodb_buffer_pool_size = 4G
innodb_thread_concurrency = 8
innodb_flush_method = O_DIRECT
innodb_log_file_size = 512M
[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
sql_mode=STRICT_ALL_TABLES
EOF
ansible namenode-01.example.com -m ansible.builtin.copy -a "src=./my.cnf dest=/etc/my.cnf owner=root group=root mode=0644"
ansible namenode-01.example.com -m ansible.builtin.command -a "cat /etc/my.cnf"
ansible namenode-01.example.com -m ansible.builtin.command -a "ls -l /etc/my.cnf"
rm ./my.cnf
## Ensure the MySQL server starts at boot:
ansible namenode-01.example.com -m ansible.builtin.shell -a "systemctl enable mysqld"
## Start the MySQL server:
ansible namenode-01.example.com -m ansible.builtin.service -a "name=mysqld state=started"
## Run /usr/bin/mysql_secure_installation to set the MySQL root password and other security-related settings.
grep 'temporary password' /var/log/mysqld.log
5xFy3j*A_g6W
/usr/bin/mysql_secure_installation
root:QpChsiajrfrd^XwI3gF1
cat <<EOF > /dev/null
Securing the MySQL server deployment.
Enter password for user root:
The existing password for the user account root has expired. Please set a new password.
New password:
Re-enter new password:
The 'validate_password' plugin is installed on the server.
The subsequent steps will run with the existing configuration
of the plugin.
Using existing password for root.
Estimated strength of the password: 100
Change the password for root ? ((Press y|Y for Yes, any other key for No) : Y
New password:
Re-enter new password:
Estimated strength of the password: 100
Do you wish to continue with the password provided?(Press y|Y for Yes, any other key for No) : Y
By default, a MySQL installation has an anonymous user,
allowing anyone to log into MySQL without having to have
a user account created for them. This is intended only for
testing, and to make the installation go a bit smoother.
You should remove them before moving into a production
environment.
Remove anonymous users? (Press y|Y for Yes, any other key for No) : Y
Success.
Normally, root should only be allowed to connect from
'localhost'. This ensures that someone cannot guess at
the root password from the network.
Disallow root login remotely? (Press y|Y for Yes, any other key for No) : N
... skipping.
By default, MySQL comes with a database named 'test' that
anyone can access. This is also intended only for testing,
and should be removed before moving into a production
environment.
Remove test database and access to it? (Press y|Y for Yes, any other key for No) : Y
- Dropping test database...
Success.
- Removing privileges on test database...
Success.
Reloading the privilege tables will ensure that all changes
made so far will take effect immediately.
Reload privilege tables now? (Press y|Y for Yes, any other key for No) : Y
Success.
All done!
EOF
## Installing the MySQL JDBC Driver
ansible hadoops -m ansible.builtin.shell -a "wget https://downloads.mysql.com/archives/get/p/3/file/mysql-connector-java-5.1.49.tar.gz"
ansible hadoops -m ansible.builtin.shell -a "tar zxvf mysql-connector-java-5.1.49.tar.gz"
ansible hadoops -m ansible.builtin.shell -a "mkdir -p /usr/share/java/"
ansible hadoops -m ansible.builtin.shell -a "cp mysql-connector-java-5.1.49/mysql-connector-java-5.1.49-bin.jar /usr/share/java/mysql-connector-java.jar"
## Creating Databases for Cloudera Software
QpChsiajrfrd^XwI3gF1
mysql -u root -p
CREATE DATABASE scm DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON scm.* TO 'scm'@'%' IDENTIFIED BY 'scm^example2023';
CREATE DATABASE amon DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON amon.* TO 'amon'@'%' IDENTIFIED BY 'amon^example2023';
CREATE DATABASE rman DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON rman.* TO 'rman'@'%' IDENTIFIED BY 'rman^example2023';
CREATE DATABASE hue DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON hue.* TO 'hue'@'%' IDENTIFIED BY 'hue^example2023';
CREATE DATABASE metastore DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON metastore.* TO 'hive'@'%' IDENTIFIED BY 'hive^example2023';
CREATE DATABASE sentry DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON sentry.* TO 'sentry'@'%' IDENTIFIED BY 'sentry^example2023';
CREATE DATABASE nav DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON nav.* TO 'nav'@'%' IDENTIFIED BY 'nav^example2023';
CREATE DATABASE navms DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON navms.* TO 'navms'@'%' IDENTIFIED BY 'navms^example2023';
CREATE DATABASE oozie DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
GRANT ALL ON oozie.* TO 'oozie'@'%' IDENTIFIED BY 'oozie^example2023';
SHOW DATABASES;
SHOW GRANTS FOR 'oozie'@'%';
exit;
|
Step 5: Set up the Cloudera Manager Database#
1
2
3
4
| # Cloudera Manager Server
# Syntax for scm_prepare_database.sh
/opt/cloudera/cm/schema/scm_prepare_database.sh mysql scm scm
scm^example2023
|
Step 6: Install CDH and Other Software#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| # 1. Start Cloudera Manager Server:
ansible hadoops -m ansible.builtin.service -a "name=rpcbind state=started"
ansible namenode-01.example.com -m ansible.builtin.shell -a "systemctl enable cloudera-scm-server"
ansible namenode-01.example.com -m ansible.builtin.service -a "name=cloudera-scm-server state=started"
# 2. Wait several minutes for the Cloudera Manager Server to start. To observe the startup process, run the following on the Cloudera Manager Server host:
tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log
# 3. In a web browser, go to http://<server_host>:7180, where <server_host> is the FQDN or IP address of the host where the Cloudera Manager Server is running.
# 4. Log into Cloudera Manager Admin Console. The default credentials are:
http://namenode-01.example.com:7180/
Username: admin
Password: admin
密码修改为: example.com
|
集群名称:example
发现主机:namenode-[01-02].example.com,datanode-[01-03].example.com
自定义存储库:http://192.168.1.99/cloudera-repos/cm6/
自定义parcel库:http://192.168.1.99/cdh6_parcel/
root密码:example.com
kafka: whitelist: cloudera_mirrormaker
kafka broker lists:datanode-01.example.com:9092,datanode-02.example.com:9092,datanode-03.example.com:9092
修改策略:dfs.namenode.ec.system.default.policy
Step 7: Set Up a Cluster Using the Wizard#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
| # 卸载
ansible hadoops -m ansible.builtin.service -a "name=cloudera-scm-agent state=stopped"
ansible namenode-01.example.com -m ansible.builtin.service -a "name=cloudera-scm-server state=stopped"
ansible hadoops -m ansible.builtin.yum -a 'name=oracle-j2sdk1.8 state=absent'
ansible hadoops -m ansible.builtin.yum -a 'name=cloudera-manager-agent state=absent'
ansible namenode-01.example.com -m ansible.builtin.yum -a 'name=cloudera-manager-server state=absent'
ansible namenode-01.example.com -m ansible.builtin.shell -a 'rm -rf /etc/cloudera-scm-server'
ansible hadoops -m ansible.builtin.yum -a 'name=cloudera-manager-daemons state=absent'
ansible hadoops -m ansible.builtin.shell -a 'rm -rf /var/lib/cloudera-scm-server'
ansible hadoops -m ansible.builtin.shell -a 'rm -rf /var/lib/cloudera-scm-agent'
ansible hadoops -m ansible.builtin.shell -a 'rm -rf /etc/cloudera-scm-agent'
ansible hadoops -m ansible.builtin.shell -a 'rm -rf /opt/cloudera/cm-agent'
## Creating Databases for Cloudera Software
QpChsiajrfrd^XwI3gF1
mysql -u root -p
DROP DATABASE scm;
DROP user 'scm'@'%';
DROP DATABASE amon;
DROP user 'amon'@'%';
DROP DATABASE rman;
DROP user 'rman'@'%';
DROP DATABASE hue;
DROP user 'hue'@'%';
DROP DATABASE metastore;
DROP user TO 'hive'@'%';
DROP DATABASE sentry;
DROP user 'sentry'@'%';
DROP DATABASE nav;
DROP user 'nav'@'%';
DROP DATABASE navms;
DROP user 'navms'@'%';
DROP DATABASE oozie;
DROP user 'oozie'@'%';
SHOW DATABASES;
SHOW GRANTS FOR 'scm'@'%';
|