import time
st = time.time()
import pandas as pd
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
engine = create_engine('presto://xx.xx.x.xxx:6030')
wenzhang =pd.read_sql("""select *
from dim_content_7days
order by 2 desc
""",engine)
wenzhang =wenzhang.loc[:,['content_id','title','nickname','subject_name','val_class_name','val_editor_name']]
wenzhang.fillna('', inplace=True)
wenzhang["title"] = wenzhang["title"].map(str) + wenzhang["nickname"].map(str) + wenzhang["val_class_name"].map(str)+ wenzhang["val_editor_name"].map(str)
wenzhang=wenzhang.loc[:,['content_id','title']]
#wenzhang.to_csv("articleSimilarity.csv",sep='\t',index=False)
#Server
wenzhang.to_csv("/service/data/articleSimilarity.csv",sep='\t',index=False)
et= time.time()
print("数据获取耗时:{}s".format(et-st))
|