# -*- coding: utf-8 -*-
# !/usr/bin/python3
import logging
import pymysql
import psycopg2
import pandas as pd
logging.basicConfig(level=logging.INFO,
filename='./log.txt',
filemode='w',
format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s')
HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USERNAME = 'mysql_username'
MYSQL_PASSWORD = 'mysql_password'
MYSQL_DB = 'database_name'
PG_PORT = 5432
PG_USERNAME = 'pg_username'
PG_PASSWORD = 'pg_password'
PG_DB = 'database_name'
# 打开数据库连接
mysql_conn = pymysql.connect(host=HOST, port=MYSQL_PORT, user=MYSQL_USERNAME, passwd=MYSQL_PASSWORD, db=MYSQL_DB)
pg_conn = psycopg2.connect(database=PG_DB, user=PG_USERNAME, password=PG_PASSWORD, host=HOST, port=PG_PORT)
# 使用 cursor() 方法创建一个游标对象 cursor
mysql_cursor = mysql_conn.cursor()
pg_cursor = pg_conn.cursor()
# 使用 execute() 方法执行 SQL 查询:两个数据库中bct的表
# 其中mysql只能获取到表,pg可以获取到表及表的记录数
mysql_cursor.execute("select table_name from information_schema.tables where table_schema = '" + MYSQL_DB + "';")
pg_cursor.execute("SELECT schemaname,relname,n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC;")
# 表数据fetchall
mysql_data = mysql_cursor.fetchall()
pg_data = pg_cursor.fetchall()
# 转换成list形式
mysql_data = list(mysql_data)
pg_data = list(pg_data)
# 转换为dataframe
mysql_df = pd.DataFrame(mysql_data, columns=["table_name"])
pg_df = pd.DataFrame(pg_data, columns=["schemaname", "relname", "n_live_tup"])
# 储存mysql中bct的所有表和表的记录数 [{table:count},{table:count}……]
mysql_list = []
# 相等的表数
eq_count = 0
# 不相等的表数
neq_count = 0
logging.info('--------------------------------')
for index, mysql_row in mysql_df.iterrows():
# 在此处循环搜索每个表的数据量,然后放入一个map中
sql = "select count(*) as count from " + MYSQL_DB + "." + mysql_row['table_name'] + ";"
mysql_cursor.execute(sql)
mysql_count = pd.DataFrame(mysql_cursor.fetchone(), columns=['count'])['count'][0]
mysql_list.append({mysql_row['table_name']: mysql_count})
for index, pg_row in pg_df.iterrows():
if pg_row['relname'].upper() == mysql_row['table_name'].upper():
pgcount = pg_row['n_live_tup']
if pgcount == mysql_count:
eq_count = eq_count + 1
logging.info(
'PG_table: ' + pg_row['relname'] + ' --> table_count is ' + str(pgcount) + ' == MYSQL_table: ' +
mysql_row['table_name'] + ' --> table_count is ' + str(mysql_count))
print('PG_table: ' + pg_row['relname'] + ' --> table_count is ' + str(pgcount) + ' == MYSQL_table: ' +
mysql_row['table_name'] + ' --> table_count is ' + str(mysql_count))
else:
neq_count = neq_count + 1
logging.info(
'PG_table: ' + pg_row['relname'] + ' --> table_count is ' + str(pgcount) + ' != MYSQL_table: ' +
mysql_row['table_name'] + ' --> table_count is ' + str(mysql_count))
print('PG_table: ' + pg_row['relname'] + ' --> table_count is ' + str(pgcount) + ' != MYSQL_table: ' +
mysql_row['table_name'] + ' --> table_count is ' + str(mysql_count))
logging.info('--------------------------------')
logging.info('相等的表数为:' + str(eq_count))
logging.info('不相等的表数为:' + str(neq_count))
logging.info('--------------------------------')
print('相等的表数为:' + str(eq_count))
print('不相等的表数为:' + str(neq_count))
# 关闭数据库连接
mysql_conn.close()
pg_conn.close()
print('over')
|