import os
import time
from datetime import datetime
import pandas as pd
from clickhouse_driver import Client
client = Client(host='hadoop102', port='9000', database='alibaba')
for filename in os.listdir('/home/lin/data'):
file = open('/home/lin/data/'+filename, "r")
filelist = file.readlines()
datalist = []
for item in filelist:
num = item.split(",")
pv = 0
cart = 0
buy = 0
fav = 0
if (num[3] == 'pv'):
pv = 1
if (num[3] == 'fav'):
fav = 1
if (num[3] == 'buy'):
buy = 1
if (num[3] == 'cart'):
cart = 1
timeArray = time.localtime(int(num[4]))
date = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
date = pd.to_datetime(date)
dicts = {'user_id': int(num[0]), 'item_id': int(num[1]), 'cate_id': int(num[2]), 'pv': pv, 'fav': fav,
'cart': cart, 'buy': cart, 'times': date}
datalist.append(dicts)
client.execute("insert into user(user_id,item_id,cate_id,pv,fav,cart,buy,times) values", datalist,
types_check=True)
client.execute("show tables")
client.execute("insert into user(user_id,item_id,cate_id,pv,fav,cart,buy,times) values", datalist, types_check=True)
下面附上python操作ClickHouse的官网教程 Python操作ClickHouse
|