一、功能介绍
1、选择csv文件夹,遍历这个文件夹下的所有csv文件,csv文件名作为表名。这个文件夹下只能有csv文件,不能继续往下遍历文件夹。
2、可以设置表名对应的表注释 get_table_comment(),有需要自己设置。
3、可以设置csv表头对应的标准列名 get_en(),有需要自己设置。
打算出的功能: 1、直接把csv中的数据导入到MySQL表中 2、Hive版本
二、先上源码
import pandas as pd
import getpass
import os
import tkinter as tk
from tkinter import filedialog
def get_sql(table_path):
path = table_path.replace('\\', '/')
path_split = path.split('/')
table_name = path_split[len(path_split) - 1].split('.')[0]
try:
csv = get_unicode(path)
except UnicodeDecodeError as e:
notion = "类型选择错误,请重新选择\n" + str(e)
print(notion)
csv = get_unicode(path)
df = pd.DataFrame(csv)
columns = df.columns
print(columns)
comment = get_table_comment(table_name)
sql = "create table " + table_name + "(\n"
index = 0
for i in columns:
index += 1
cn_index = i
cn_index = get_en(cn_index)
if cn_index is None:
if index == len(columns):
sql = sql + " " + i + ' varchar(30)\n'
else:
sql = sql + " " + i + ' varchar(30),\n'
else:
if index == len(columns):
sql = sql + " " + cn_index + ' varchar(30)\n'
else:
sql = sql + " " + cn_index + ' varchar(30),\n'
char = ') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ' + 'comment=' + "'" + comment + "'" + ';\n\n\n'
sql_final = sql + char
print(sql_final)
user = getpass.getuser()
sql_path = "C:/Users/" + user + "/Desktop/result.sql"
with open(sql_path, encoding='utf-8', mode='a') as fp:
fp.write(sql_final)
fp.close()
print("SQL存储在 %s中" % sql_path)
def get_en(cn):
index = {
"日期": "date",
"汇率": "rate",
"部位": "part",
"国家": "country",
"厂号": "number",
"成交价格": "price",
"阿根廷": "AR",
"巴西": "BR",
"巴拉圭": "PY",
"乌拉圭": "UY",
"中国": "CN",
"中国总进口量": "CN_import",
"澳大利亚": "AU",
"新西兰": "NZ",
"智利": "CL",
"哥斯达黎加": "CR",
"哈萨克斯坦": "KZ",
"南非": "ZA",
"纳米比亚": "NA",
"俄罗斯": "RU",
"其他": "other",
"年份": "year",
"鸡全架": "chicken1",
"鸡脖": "chicken2",
"鸡翅": "chicken3",
"鸡翅尖": "chicken4",
"鸡翅中": "chicken5",
"鸡翅根": "chicken6",
"鸡大胸": "chicken7",
"鸡腿": "chicken8",
"鸡腿块": "chicken9",
"琵琶腿": "chicken10",
"鸡爪": "chicken11",
"鸡胗": "chicken12",
"鸡心": "chicken13",
"鸡肝": "chicken14",
"鸡边腿(美国)": "chicken15",
"数值": "ratio",
"巴西总出口量(不含下水)": "BR_beef_export0",
"巴西至中国大陆": "BR_CN",
"巴西至中国香港": "BR_HK",
"阿根廷总出口量": "AR_export",
"阿根廷至中国大陆": "AR_CN",
"乌拉圭总出口量(不含下水)": "UY_beef_export",
"乌拉圭至中国大陆(不含下水)": "UY_CN0",
"乌拉圭至中国大陆(下水)": "UY_CN1",
"澳大利亚总出口量": "AU_export",
"澳大利亚至中国大陆": "AU_CN",
"澳大利亚至日本": "AU_JP",
"澳大利亚至美国": "AU_US",
"澳大利亚至韩国": "AU_KR",
"澳大利亚至其他": "AU_other",
"新西兰总出口量": "NZ_export",
"新西兰至中国大陆": "NZ_CN",
"美国总出口量(含下水)": "US_beef_export1",
"美国牛肉出口量(不含下水)": "US_beef_export0",
"美国牛肉至中国大陆": "US_beef_CN",
"牛副产品": "offal",
"美国牛副至中国大陆": "US_offal_CN",
"巴拉圭总出口量": "PY_export",
"梅肉": "pig1",
"二号肉": "pig2",
"四号肉": "pig3",
"猪头": "pig4",
"猪手": "pig5",
"猪脚": "pig6",
"六分体": "pig7",
"带皮五花": "pig8",
"前段": "pig9",
"中段": "pig10",
"后段": "pig11",
"猪尾": "pig12",
"猪舌": "pig13",
"碎膘": "pig14",
"背膘": "pig15",
"前筒骨": "pig16",
"后筒骨": "pig17",
"肋排": "pig18",
"小排": "pig19",
"冷冻猪碎肉(墨西哥)": "pig20",
"美国": "US",
"西班牙": "ES",
"德国": "DE",
"加拿大": "CA",
"丹麦": "DK",
"荷兰": "NL",
"法国": "FR",
"猪肉": "pork",
"牛肉": "beef",
"羊肉": "lamb",
"鸡肉": "chicken"
}
return index.get(cn, None)
def get_table_comment(table_name):
table_comment = {
"market_quotations_AR_US_dollar_exchange_rate": "阿根廷美元汇率",
"market_quotations_BR_US_dollar_exchange_rate": "巴西雷亚尔美元汇率",
"market_quotations_beef_exports_price_index": "海外牛肉出口价格指数",
"market_quotations_beef_exports": "海外牛肉出口量",
"market_quotations_chicken": "鸡肉各部位现货价格",
"market_quotations_beef": "牛肉各部位现货价格",
"market_quotations_CN_US_dollar_exchange_rate": "人民币美元汇率",
"market_quotations_CAB_CPI": "中国巴西阿根廷cpi指数",
"market_quotations_CAB_GDP": "中国巴西阿根廷年度GDP增速",
"market_quotations_beef_imports": "中国大陆牛肉进口量",
"market_quotations_pork_imports": "中国大陆猪肉进口量",
"market_quotations_hog_price_vs_corn_price": "猪粮比指数",
"market_quotations_wholesale_price": "猪牛羊鸡批发价格",
"market_quotations_pig": "猪肉各部位现货价格"
}
return table_comment.get(table_name, None)
def get_unicode(path):
csv = pd.read_csv(path, encoding='gb2312')
return csv
def get_table_path():
root = tk.Tk()
root.withdraw()
path = filedialog.askdirectory()
for root, dirs, files in os.walk(path):
for file in files:
table_path = root + '/' + file
get_sql(table_path)
if __name__ == '__main__':
get_table_path()
三、实现细节介绍
1、选择文件夹的窗口
通过窗口选择文件夹
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
path = filedialog.askdirectory()
2、遍历文件夹所有csv文件
import os
for root, dirs, files in os.walk(path):
for file in files:
table_path = root + '/' + file
get_sql(table_path)
root:是文件夹路径 dirs:这个dirs不太懂是用来干嘛的,打印dirs是空的数据。 files:是文件夹下所有csv文件
3、获取对应列名,对应表注释
get_en()、get_table_comment() 使用了字典,字典名.get(Key,None) 如果有对应的,则返回对应的Value,如果没有对应的,返回None
4、编写SQL
4.1、先通过字符串处理,获取到表名,其实可以通过遍历的时候直接传过来
path = table_path.replace('\\', '/')
path_split = path.split('/')
table_name = path_split[len(path_split) - 1].split('.')[0]
4.2、设置读取csv的编码格式问题
有的是GBK,有的是utf-8,我这边给了一个选择 源码上我给写死了,解开注释就好了
get_sql():
try:
csv = get_unicode(path)
except UnicodeDecodeError as e:
notion = "类型选择错误,请重新选择\n" + str(e)
print(notion)
csv = get_unicode(path)
def get_unicode(path):
csv = pd.read_csv(path, encoding='gb2312')
return csv
后面就是一些比较细碎的创建SQL语句,可自行查看
4.3、将SQL写入到文件中
user = getpass.getuser()
sql_path = "C:/Users/" + user + "/Desktop/result.sql"
with open(sql_path, encoding='utf-8', mode='a') as fp:
fp.write(sql_final)
fp.close()
print("SQL存储在 %s中" % sql_path)
五、结果图
|