一.卸载 mariadb(卸载和mysql冲突的分支)
[root@Linux121 guchenfei]# rpm -qa | grep mariadb
mariadb-libs-5.5.68-1.el7.x86_64
[root@Linux121 guchenfei]# yum remove mariadb-libs
.......
Removed:
mariadb-libs.x86_64 1:5.5.68-1.el7
Dependency Removed:
hadoop.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7 hadoop-0.20-mapreduce.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7
hadoop-client.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7 hadoop-hdfs.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7
hadoop-libhdfs.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7 hadoop-mapreduce.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7
hadoop-yarn.x86_64 0:2.6.0+cdh5.7.6+1551-1.cdh5.7.6.p0.7.el7 hbase.x86_64 0:1.2.0+cdh5.7.6+208-1.cdh5.7.6.p0.7.el7
hive.noarch 0:1.1.0+cdh5.7.6+685-1.cdh5.7.6.p0.7.el7 hive-jdbc.noarch 0:1.1.0+cdh5.7.6+685-1.cdh5.7.6.p0.7.el7
impala.x86_64 0:2.5.0+cdh5.7.6+0-1.cdh5.7.6.p0.7.el7 impala-server.x86_64 0:2.5.0+cdh5.7.6+0-1.cdh5.7.6.p0.7.el7
parquet.noarch 0:1.5.0+cdh5.7.6+180-1.cdh5.7.6.p0.7.el7 parquet-format.noarch 0:2.1.0+cdh5.7.6+18-1.cdh5.7.6.p0.7.el7
postfix.x86_64 2:2.10.1-9.el7 redhat-lsb-core.x86_64 0:4.1-27.el7.centos.1
sentry.noarch 0:1.5.1+cdh5.7.6+214-1.cdh5.7.6.p0.7.el7 solr.noarch 0:4.10.3+cdh5.7.6+430-1.cdh5.7.6.p0.7.el7
Complete!
二.安装依赖
安装版本:mysql57-community-release-el7-11.noarch.rpm
rpm -ivh mysql57-community-release-el7-11.noarch.rpm
安装python编译相关的依赖
yum install readline readline-devel -y
yum install gcc -y
yum install zlib* -y
yum install openssl openssl-devel -y
yum install sqlite-devel -y
yum install python3-devel mysql-devel
三.安装python环境
tar -zxvf Python-3.6.6.tgz
configure文件是脚本文件,如果配置了--prefix,安装后所有的资源文件都会放到这个目录中
./configure --prefix=/usr/local/python3.6
make && make install
/usr/local/python3.6/bin/pip3 install virtualenv
启动python环境
cd /usr/local/python3.6/bin/
./virtualenv env
. env/bin/activate (激活虚拟环境)
(env) [root@Linux121 bin]# python
Python 3.6.6 (default, Jan 21 2022, 10:27:11)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-44)] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> exit()
(env) [root@Linux121 bin]# python -V
Python 3.6.6
四.安装Airflow(切记在上边Python 3.6.6虚拟环境中安装会容易些)
python环境相关的命令在Python 3.6.6,操作系统相关的命令无所谓,没有影响 环境变量配置
export AIRFLOW_HOME=/opt/lagou/servers/airflow
#使用豆瓣源非常快. -i:指定库的安装源(python源)
pip install apache-airflow==1.10.11 -i https://pypi.douban.com/simple
创建数据库用户并授权
create database airflowLinux122;
创建用户airflow,设置所有ip均可访问
set global validate_password_policy=LOW;
set global validate_password_length=6;
create user 'airflow'@'%' identified by '123456';
create user 'airflow'@'localhost' identified by '123456';
用户授权
grant all on airflowLinux122.* to 'airflow'@'%';
set global explicit_defaults_for_timestamp = 1;
flush privileges;
五.修改Airflow DB 配置
pip install mysqlclient==1.4.6
出现问题如下
(env) [root@Linux121 bin]# pip install mysqlclient==1.4.6
Collecting mysqlclient==1.4.6
Using cached mysqlclient-1.4.6.tar.gz (85 kB)
Preparing metadata (setup.py) ... error
ERROR: Command errored out with exit status 1:
command: /usr/local/python3.6/bin/env/bin/python -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/setup.py'"'"'; __file__='"'"'/tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-pip-egg-info-6pa2rfvb
cwd: /tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/
Complete output (12 lines):
/bin/sh: mysql_config: command not found
/bin/sh: mariadb_config: command not found
/bin/sh: mysql_config: command not found
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/setup.py", line 16, in <module>
metadata, options = get_config()
File "/tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/setup_posix.py", line 61, in get_config
libs = mysql_config("libs")
File "/tmp/pip-install-818xaeka/mysqlclient_964dddc6ad69476c80328338febb4f2e/setup_posix.py", line 29, in mysql_config
raise EnvironmentError("%s not found" % (_mysql_config_path,))
OSError: mysql_config not found
----------------------------------------
WARNING: Discarding https://files.pythonhosted.org/packages/d0/97/7326248ac8d5049968bf4ec708a5d3d4806e412a42e74160d7f266a3e03a/mysqlclient-1.4.6.tar.gz#sha256=f3fdaa9a38752a3b214a6fe79d7cae3653731a53e577821f9187e67cbecb2e16 (from https://pypi.org/simple/mysqlclient/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
ERROR: Could not find a version that satisfies the requirement mysqlclient==1.4.6 (from versions: 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5, 1.3.6, 1.3.7, 1.3.8, 1.3.9, 1.3.10, 1.3.11rc1, 1.3.11, 1.3.12, 1.3.13, 1.3.14, 1.4.0rc1, 1.4.0rc2, 1.4.0rc3, 1.4.0, 1.4.1, 1.4.2, 1.4.2.post1, 1.4.3, 1.4.4, 1.4.5, 1.4.6, 2.0.0, 2.0.1, 2.0.2, 2.0.3, 2.1.0rc1, 2.1.0)
ERROR: No matching distribution found for mysqlclient==1.4.6
查询mysql_config确实不存在
(env) [root@Linux121 bin]# whereis mysql_config
mysql_config:(env) [root@Linux121 bin]# pip install pymssql -i https://pypi. douban.com/simple/
ERROR: Invalid requirement: 'douban.com/simple/'
Hint: It looks like a path. File 'douban.com/simple/' does not exist.
网上一大片解决该问题原因,根本原因是安装mysql时候依赖没有安装全 我是通过安装包方式安装,只是将其中部分进行了rpm安装 mysql-5.7.26-1.el7.x86_64.rpm-bundle.tar 我是怎么发现缺少那个依赖的哈哈,讲讲 1.通过/bin/sh: mysql_config: command not found
[root@Linux121 servers]# whereis mysql_config
发现为空,确实不存在
判断没安mysql-devel
yum install mysql-devel
[root@Linux121 usr]# yum install mysql-devel
Loaded plugins: fastestmirror, langpacks
Loading mirror speeds from cached hostfile
* base: mirrors.aliyun.com
* extras: mirrors.aliyun.com
* updates: mirrors.aliyun.com
Resolving Dependencies
--> Running transaction check
---> Package mariadb-devel.x86_64 1:5.5.68-1.el7 will be installed
--> Processing Dependency: mariadb-libs(x86-64) = 1:5.5.68-1.el7 for package: 1:mariadb-devel-5.5.68-1.el7.x86_64
--> Processing Dependency: libmysqlclient.so.18()(64bit) for package: 1:mariadb-devel-5.5.68-1.el7.x86_64
--> Running transaction check
---> Package mariadb-libs.x86_64 1:5.5.68-1.el7 will be installed
Removing mariadb-libs.x86_64 1:5.5.68-1.el7 - u due to obsoletes from installed mysql-community-libs-5.7.26-1.el7.x86_64
--> Restarting Dependency Resolution with new changes.
--> Running transaction check
---> Package mariadb-libs.x86_64 1:5.5.68-1.el7 will be installed
--> Processing Dependency: mariadb-libs(x86-64) = 1:5.5.68-1.el7 for package: 1:mariadb-devel-5.5.68-1.el7.x86_64
--> Processing Dependency: libmysqlclient.so.18()(64bit) for package: 1:mariadb-devel-5.5.68-1.el7.x86_64
--> Finished Dependency Resolution
Error: Package: 1:mariadb-devel-5.5.68-1.el7.x86_64 (base)
Requires: libmysqlclient.so.18()(64bit)
Error: Package: 1:mariadb-devel-5.5.68-1.el7.x86_64 (base)
Requires: mariadb-libs(x86-64) = 1:5.5.68-1.el7
You could try using --skip-broken to work around the problem
** Found 2 pre-existing rpmdb problem(s), 'yum check' output follows:
icedtea-web-1.7.1-2.el7_6.x86_64 has missing requires of java-1.8.0-openjdk
jline-1.0-8.el7.noarch has missing requires of java >= ('0', '1.5', None)
Error: Package: 1:mariadb-devel-5.5.68-1.el7.x86_64 (base)
Requires: libmysqlclient.so.18()(64bit)
Error: Package: 1:mariadb-devel-5.5.68-1.el7.x86_64 (base)
Requires: mariadb-libs(x86-64) = 1:5.5.68-1.el7
通过查询依赖网站 https://pkgs.org/ 我从解压包发现确实有它 安装吧 剩下的依赖同理. 这样mysql-devel依赖解决安装成功.mysqlclient同时顺利安装成功 airflow initdb 进入自己配置的airflow路径,修改数据库相关信息
/opt/lagou/servers/airflow
vim airflow.cfg
74 sql_alchemy_conn = mysql://airflow:123456@Linux123:3306/airflowLinux122
再次
airflow initdb
六.安装airflow密码模块
pip install apache-airflow[password]
修改配置
注意修改对应节点下的配置例如[webserver]
[webserver]
authenticate = True
auth_backend = airflow.contrib.auth.backends.password_auth
添加密码文件
python命令,执行一遍,添加用户登录,设置口令(记得3.6.6下执行)
import airflow
from airflow import models,settings
from airflow.contrib.auth.backends.password_auth import PasswordUser
user = PasswordUser(models.User())
user.username = 'airflow'
user.email = 'airflow@gmail.com'
user.password = 'airflow123'
session = settings.Session()
session.add(user)
session.commit()
session.close()
exit()
七.启动服务
未进入python3环境记得先进入
1.启动scheduler调度器
airflow scheduler -D(-D表示后台启动)
(env) [root@Linux121 bin]# airflow scheduler -D
____________ _____________
____ |__( )_________ __/__ /________ __
____ /| |_ /__ ___/_ /_ __ /_ __ \_ | /| / /
___ ___ | / _ / _ __/ _ / / /_/ /_ |/ |/ /
_/_/ |_/_/ /_/ /_/ /_/ \____/____/|__/
[2022-01-23 14:07:08,273] {__init__.py:50} INFO - Using executor SequentialExecutor
同时在airflow目录下多了启动日志文件
[root@Linux121 airflow]# ls -ll
total 144
-rw-r--r-- 1 root root 37715 Jan 23 12:21 airflow.cfg
-rw-r--r-- 1 root root 87040 Jan 22 23:40 airflow.db
-rw-r--r-- 1 root root 0 Jan 23 14:07 airflow-scheduler.err
-rw-r--r-- 1 root root 613 Jan 23 14:07 airflow-scheduler.log
-rw-r--r-- 1 root root 1022 Jan 23 14:07 airflow-scheduler.out
-rw-r--r-- 1 root root 6 Jan 23 14:07 airflow-scheduler.pid
drwxr-xr-x 4 root root 52 Jan 23 14:07 logs
-rw-r--r-- 1 root root 2598 Jan 22 23:40 unittests.cfg
2.启动页面服务
airflow webserver -D
(env) [root@Linux121 bin]# airflow webserver -D
____________ _____________
____ |__( )_________ __/__ /________ __
____ /| |_ /__ ___/_ /_ __ /_ __ \_ | /| / /
___ ___ | / _ / _ __/ _ / / /_/ /_ |/ |/ /
_/_/ |_/_/ /_/ /_/ /_/ \____/____/|__/
[2022-01-23 14:12:24,314] {__init__.py:50} INFO - Using executor SequentialExecutor
[2022-01-23 14:12:24,315] {dagbag.py:396} INFO - Filling up the DagBag from /opt/lagou/servers/airflow/dags
/usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow/models/dag.py:1342: PendingDeprecationWarning: The requested task could not be added to the DAG because a task with task_id create_tag_template_field_result is already in the DAG. Starting in Airflow 2.0, trying to overwrite a task will raise an exception.
category=PendingDeprecationWarning)
Running the Gunicorn Server with:
Workers: 4 sync
Host: 0.0.0.0:8080
Timeout: 120
Logfiles: - -
=================================================================
同样在airflow文件下多了文件
[root@Linux121 airflow]# ls -ll
total 152
-rw-r--r-- 1 root root 37715 Jan 23 12:21 airflow.cfg
-rw-r--r-- 1 root root 87040 Jan 22 23:40 airflow.db
-rw-r--r-- 1 root root 0 Jan 23 14:07 airflow-scheduler.err
-rw-r--r-- 1 root root 613 Jan 23 14:07 airflow-scheduler.log
-rw-r--r-- 1 root root 1022 Jan 23 14:07 airflow-scheduler.out
-rw-r--r-- 1 root root 6 Jan 23 14:07 airflow-scheduler.pid
-rw-r--r-- 1 root root 0 Jan 23 14:12 airflow-webserver.err
-rw-r--r-- 1 root root 0 Jan 23 14:12 airflow-webserver.log
-rw-r--r-- 1 root root 6 Jan 23 14:12 airflow-webserver-monitor.pid
-rw-r--r-- 1 root root 0 Jan 23 14:12 airflow-webserver.out
-rw-r--r-- 1 root root 6 Jan 23 14:12 airflow-webserver.pid
drwxr-xr-x 4 root root 52 Jan 23 14:07 logs
-rw-r--r-- 1 root root 2598 Jan 22 23:40 unittests.cfg
稍等10s左右可以访问web页面 http://linux121:8080/
八.修改配置
1.修改时间
airflow.cfg
大约在65行
default_timezone = Asia/Shanghai
/usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow
(env) [root@Linux121 airflow]# cd utils/
(env) [root@Linux121 utils]# vim timezone.py
大约27行注释增加如下代码
from airflow import configuration
try:
tz = configuration.conf("core","default_timezone")
if tz == "system":
utc = pendulum.local_timezone()
else:
utc = pendulum.timezone(tz)
except Exception:
pass
修改utcnow函数(注释72行增加如下代码)
#d = dt.datetime.utcnow()
#d = d.replace(tzinfo=utc)
#注意python对格式比较苛刻
d = dt.datetime.now()
d = d.replace(tzinfo=utc)
(env) [root@Linux121 utils]# vim sqlalchemy.py
在38行增加如下代码
from airflow import configuration
try:
tz = configuration.conf("core","default_timezone")
if tz == "system":
utc = pendulum.local_timezone()
else:
utc = pendulum.timezone(tz)
except Exception:
pass
/usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow/www/templates/admin
(env) [root@Linux121 admin]# vim master.html
40 var UTCseconds = (x.getTime() + x.getTimezoneOffset()*60*1000);
改成: var UTCseconds = x.getTime();
43 "timeFormat":"H:i:s %UTC%",
改成: "timeFormat":"H:i:s",
重启airflow webserver
查看airflow相关服务进程
(env) [root@Linux121 admin]# ps -ef | grep airflow
root 25780 1 0 14:52 ? 00:00:27 /usr/local/python3.6/bin/env/bin/python /usr/local/python3.6/bin/env/bin/airflow scheduler -D
root 25785 25780 1 14:52 ? 00:02:25 airflow scheduler -- DagFileProcessorManager
root 26738 1 1 14:58 ? 00:03:15 /usr/local/python3.6/bin/env/bin/python /usr/local/python3.6/bin/env/bin/airflow webserver -D
root 26763 1 0 14:58 ? 00:00:06 gunicorn: master [airflow-webserver]
root 61067 26763 0 18:08 ? 00:00:00 [ready] gunicorn: worker [airflow-webserver]
root 61156 26763 0 18:09 ? 00:00:00 [ready] gunicorn: worker [airflow-webserver]
root 61249 26763 1 18:09 ? 00:00:00 [ready] gunicorn: worker [airflow-webserver]
root 61358 26763 2 18:10 ? 00:00:00 [ready] gunicorn: worker [airflow-webserver]
root 61427 25785 0 18:10 ? 00:00:00 airflow scheduler - DagFileProcessor /usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow/example_dags/example_bash_operator.py
root 61428 25785 0 18:10 ? 00:00:00 airflow scheduler - DagFileProcessor /usr/local/python3.6/bin/env/lib/python3.6/site-packages/airflow/example_dags/example_external_task_marker_dag.py
root 61430 6564 0 18:10 pts/0 00:00:00 grep --color=auto airflow
(env) [root@Linux121 admin]#
#关闭 airflow webserver 对应的服务
ps -ef | grep 'airflow-webserver' | grep -v 'grep' | awk '{print $2}' | xargs -i kill -9 {}
#关闭airflow scheduler对应的服务
ps -ef | grep 'airflow' | grep 'scheduler' | awk '{print $2}' | xargs -i kill -9 {}
删除对应的pid文件
/opt/lagou/servers/airflow
[root@Linux121 airflow]# rm -rf *.pid
重新启动
airflow scheduler -D
airflow webserver -D
此刻时间显示正常
九.禁用自带的DAG任务
停止任务同上
修改配置文件
[root@Linux121 airflow]# vim airflow.cfg
大约137行
137 load_examples = False
重置db
airflow resetdb -y
重置口令
import airflow
from airflow import models,settings
from airflow.contrib.auth.backends.password_auth import PasswordUser
user = PasswordUser(models.User())
user.username = 'airflow'
user.email = 'airflow@gmail.com'
user.password = 'airflow123'
session = settings.Session()
session.add(user)
session.commit()
session.close()
exit()
重启服务 ok
|