IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> Python知识库 -> fusai数据 -> 正文阅读

[Python知识库]fusai数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import warnings
import seaborn as sns
import scipy.stats as st
import gc

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth', 100)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# d.date_range("2018/01/01", "2018/01/10", freq="D")
path = 'traina/'  
path1 = 'trainb/'
cust_info = pd.read_csv(path + '网约房平台入住人表.csv')
cust_info1 = pd.read_csv(path1 + '网约房平台入住人表.csv')
cust = pd.concat([cust_info, cust_info1])
cust.head(2)
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIME
007C5BF73B18B44B0877DEED007F8771DNaN19800627222405NaNNaN
13525D57CAE104A078E4962B2B89377B0371099C301202107090001199510253705232.021071e+112.021071e+11
gc.collect()
17
cust['BDATE'] = cust['BDATE'].astype('str')cust['XZQH'] = cust['XZQH'].astype('str')cust['IN_TIME'] = cust['IN_TIME'].fillna(0).astype('str').apply(lambda x: x[:12])cust['OUT_TIME'] = cust['OUT_TIME'].fillna(0).astype('str').apply(lambda x: x[:12])
cust.head(3)
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIME
007C5BF73B18B44B0877DEED007F8771DNaN198006272224050.00.0
13525D57CAE104A078E4962B2B89377B0371099C30120210709000119951025370523202107092103202107111158
2A1414E7D71E34E1DA743AC08BCF49151371002182220200824000119860311370781202008241401202008251200
# cust['in_hour'] = cust['IN_TIME'].apply(lambda x:x[-4:-2])# cust['in_minute'] = cust['IN_TIME'].apply(lambda x:x[-2:])# cust['in_time'] = cust['IN_TIME'].apply(lambda x:x[:8])# cust['out_hour'] = cust['OUT_TIME'].apply(lambda x:x[-4:-2])# cust['out_minute'] = cust['OUT_TIME'].apply(lambda x:x[-2:])# cust['out_time'] = cust['OUT_TIME'].apply(lambda x:x[:8])# cust['in_datetime'] = pd.to_datetime(cust['in_time'], errors='coerce')# cust['out_datetime'] = pd.to_datetime(cust['out_time'], errors='coerce')
cust.info()
<class 'pandas.core.frame.DataFrame'>Int64Index: 44030 entries, 0 to 3343Data columns (total 6 columns): #   Column            Non-Null Count  Dtype ---  ------            --------------  -----  0   ORDER_PRIMARY_ID  44030 non-null  object 1   GUEST_ID          5158 non-null   object 2   BDATE             44030 non-null  object 3   XZQH              44030 non-null  object 4   IN_TIME           44030 non-null  object 5   OUT_TIME          44030 non-null  objectdtypes: object(6)memory usage: 2.4+ MB
cust.nunique()
ORDER_PRIMARY_ID    43883GUEST_ID             3698BDATE                9503XZQH                 2659IN_TIME              3396OUT_TIME              236dtype: int64
cust['ORDER_PRIMARY_ID'].value_counts()[10:15]
134B74870F1D4087AB3F590D2A0AAFDC    23370A2886D8F4E11860A00FEB2289E56    223D42363F202408BA04CD3D5BBAE5508    219631D36ABA448D9B0AECF0A7200C4C5    29373325C891E42CB8A8F43AF14939954    2Name: ORDER_PRIMARY_ID, dtype: int64
cust.sort_values(['ORDER_PRIMARY_ID', 'GUEST_ID'], inplace=True)
stat = cust.groupby(['ORDER_PRIMARY_ID'])['BDATE'].count().reset_index()stat.columns = ['ORDER_PRIMARY_ID', 'guest_sum']stat['guest_sum_notnull'] = cust.groupby(['ORDER_PRIMARY_ID'])['GUEST_ID'].agg('count').valuescust = cust.merge(stat, on=['ORDER_PRIMARY_ID'], how='left')
cust.drop_duplicates('ORDER_PRIMARY_ID', keep='first', inplace=True)
cust[cust['ORDER_PRIMARY_ID']=='70798782D6C04A438360D80AFE4845C1']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
2280170798782D6C04A438360D80AFE4845C1371099B389202107040001199903141309830.00.032
cust[cust['ORDER_PRIMARY_ID']=='5B94671A9390437E8E79A86D4B15A076']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
186315B94671A9390437E8E79A86D4B15A076371098A226202107010001200001143723240.00.021
cust[cust['ORDER_PRIMARY_ID']=='479006B921F24969B60B68DAEDA91909']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
14578479006B921F24969B60B68DAEDA9190937100209822020100200011984101321010320201002160120201006120022
cust[cust['ORDER_PRIMARY_ID']=='1772C1DEE3F141D8960BB9044CA4CF3A']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
47531772C1DEE3F141D8960BB9044CA4CF3A371098A783202107050001196209164222260.00.032
cust[cust['ORDER_PRIMARY_ID']=='BD39A40287CE4E76B46D3BC85BFA1298']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
38447BD39A40287CE4E76B46D3BC85BFA1298371099A428202105100004197102193702040.020210521183421

#前4位到市,前6位到县# cust_info['XZ_4'] = cust_info['XZQH'].apply(lambda x:x//100)
# cust_info['birth_date'] = pd.to_datetime(cust_info['BDATE'].astype(str), errors='coerce')# cust_info['birth_year'] = cust_info['birth_date'].dt.year.astype(int)# cust_info['birth_year'] = cust_info['birth_date'].dt.month.astype(int)# cust_info['birth_year'] = cust_info['birth_date'].dt.day.astype(int)# cust_info['birth_year'] = cust_info['birth_date'].dt.quarter.astype(int)#cust_info['birth_year'] = pd['birth_date'].dt.years#cust_info['birth_year'] = pd['birth_date'].dt.years
# cust_info['in_time_year'] = cust_info['IN_TIME'].apply(lambda x: x//10**8)# cust_info['in_time_month'] = cust_info['IN_TIME'].apply(lambda x: x//10**6 - (x//10**8)*10**2)# cust_info['in_time_day'] = cust_info['IN_TIME'].apply(lambda x: x//10**4 - (x//10**6)*10**2)# cust_info['in_time_hour'] = cust_info['IN_TIME'].apply(lambda x: x//10**2 - (x//10**4)*10**2)# cust_info['in_time_minute'] = cust_info['IN_TIME'].apply(lambda x: x - (x//10**2)*10**2)# cust_info['out_time_year'] = cust_info['OUT_TIME'].apply(lambda x: x//10**8)# cust_info['out_time_month'] = cust_info['OUT_TIME'].apply(lambda x: x//10**6 - (x//10**8)*10**2)# cust_info['out_time_day'] = cust_info['OUT_TIME'].apply(lambda x: x//10**4 - (x//10**6)*10**2)# cust_info['out_time_hour'] = cust_info['OUT_TIME'].apply(lambda x: x//10**2 - (x//10**4)*10**2)# cust_info['out_time_minute'] = cust_info['OUT_TIME'].apply(lambda x: x - (x//10**2)*10**2)
# cust_info['check_in_day'] = (cust_info['out_time_year'] - cust_info['in_time_year'])*365 + (cust_info['out_time_month'] - cust_info['in_time_month'])*30 + (cust_info['out_time_day'] - cust_info['in_time_day'])# cust_info['check_in_day'] = cust_info['check_in_day'].apply(lambda x: x if (x == np.nan) else x+1)
# cust_info['check_in_hour'] = (cust_info['out_time_year'] - cust_info['in_time_year'])*365*30*24 + (cust_info['out_time_month'] - cust_info['in_time_month'])*30*24 + (cust_info['out_time_day'] - cust_info['in_time_day'])*24 + (cust_info['out_time_hour'] - cust_info['in_time_hour'])# cust_info['check_in_hour'] = cust_info['check_in_hour'].apply(lambda x: x if (x == np.nan and x > 0) else x+24)
# cust_info.head()
# cust_info_1 = cust_info.groupby(['in_time_year', 'in_time_month'])['ORDER_PRIMARY_ID']
# 各个月份订单数量# cust_info_1.count()
cust['IN_TIME'].max()
'202110041413'
#cust_info['in_time'] = pd.to_datetime(map(cust_info['IN_TIME'].astype(int), str), errors='coerce')
# cust_info['IN_TIME'][2]

room_info = pd.read_csv(path + '网约房注册民宿.csv')
room_info.head(1)
CODEHOTELIDJYMJROOM_NUMBED_NUMFWLYCZLYCALLEDCITY_CODEBUR_CODESTA_CODESSXADDRESSMPHMJYQKFIRMDJSJBGSJSTATUSAUDITSTATUS
010017737108300020.01121【山海边公寓】海景大床房设施齐全空调做饭宽带应有尽371000000000NaNNaN371083乳山市长江路 银泰海景花园 55-301ROOM001【山海边公寓】海景大床房设施齐全空调做饭宽带应有尽32020-05-18 10:33:552020-07-15 10:23:58NaNNaN
room_info[(room_info['CALLED']==room_info['JYQK'])==False]
CODEHOTELIDJYMJROOM_NUMBED_NUMFWLYCZLYCALLEDCITY_CODEBUR_CODESTA_CODESSXADDRESSMPHMJYQKFIRMDJSJBGSJSTATUSAUDITSTATUS
room_info['CALLED'].nunique()
4232
room_info['CALLED'].value_counts()[:5]
高铁北站看海公寓                       24韩乐坊/不夜城美食街/乐天世纪城 豪华大床房 景驰公寓    20国际海水浴场精致海景大床房                  12九龙湾豪华一线海景loft复式                10高铁北站看海民宿                       10Name: CALLED, dtype: int64
room_info['HOTELID'].nunique()
5307
room_info['CODE'].nunique()
5307
room_info.info()
<class 'pandas.core.frame.DataFrame'>RangeIndex: 5307 entries, 0 to 5306Data columns (total 20 columns): #   Column       Non-Null Count  Dtype  ---  ------       --------------  -----   0   CODE         5307 non-null   object  1   HOTELID      5307 non-null   object  2   JYMJ         5307 non-null   float64 3   ROOM_NUM     5307 non-null   int64   4   BED_NUM      5307 non-null   int64   5   FWLY         5307 non-null   int64   6   CZLY         5307 non-null   int64   7   CALLED       5307 non-null   object  8   CITY_CODE    5307 non-null   int64   9   BUR_CODE     2658 non-null   float64 10  STA_CODE     2658 non-null   object  11  SSX          5307 non-null   int64   12  ADDRESS      5307 non-null   object  13  MPHM         5307 non-null   object  14  JYQK         5307 non-null   object  15  FIRM         5307 non-null   int64   16  DJSJ         5307 non-null   object  17  BGSJ         5307 non-null   object  18  STATUS       2658 non-null   float64 19  AUDITSTATUS  2658 non-null   float64dtypes: float64(4), int64(7), object(9)memory usage: 829.3+ KB
room_info.nunique()
CODE           5307HOTELID        5307JYMJ            173ROOM_NUM         11BED_NUM          17FWLY              1CZLY              4CALLED         4232CITY_CODE         1BUR_CODE          7STA_CODE         54SSX               5ADDRESS        4850MPHM            584JYQK           4232FIRM              2DJSJ           3753BGSJ           3294STATUS            1AUDITSTATUS       1dtype: int64
room_info.drop(['FWLY', 'CITY_CODE', 'STATUS', 'AUDITSTATUS', 'JYQK'], axis=1, inplace=True)
room_info['FIRM'].value_counts()
3     342910    1878Name: FIRM, dtype: int64

# platform infomationorder_info = pd.read_csv(path + '网约平台旅客订单信息.csv') order1 = pd.read_csv(path1 + '网约平台旅客订单信息.csv')order = pd.concat([order_info, order1])
order.head(2)
ORDER_IDORDER_PRIMARY_IDHOTELIDPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMESTATUSCANCEL_TIMEINSERT_TIMEMODIFY_TIMEFIRM
092352196BBDB7CC049421C85826AE07020B1392783372020080112002020080212001.596120e+111NaN20200730224152202007302241523
1923696C72F20539AD1447D86CD1A8E5EAEC63A2829322020080414002020080612001.596121e+111NaN20200730225524202007302255243
order[order['ORDER_PRIMARY_ID']=='3CDEDB5E03534D379687645675898CA4']
ORDER_IDORDER_PRIMARY_IDHOTELIDPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMESTATUSCANCEL_TIMEINSERT_TIMEMODIFY_TIMEFIRM
3097044220273CDEDB5E03534D379687645675898CA41708982021080813582021081011582.021071e+113NaN20210712210153202107122101533
cust[cust['ORDER_PRIMARY_ID']=='3CDEDB5E03534D379687645675898CA4']
ORDER_PRIMARY_IDGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
124213CDEDB5E03534D379687645675898CA4371002B268202107280001198710311102240.020210802084211
order['PRE_IN_TIME'] = order['PRE_IN_TIME'].astype('str')order['PRE_OUT_TIME'] = order['PRE_OUT_TIME'].astype('str')# order['ORDER_TIME'] = order['ORDER_TIME'].astype('str')order['INSERT_TIME'] = order['INSERT_TIME'].astype('str')order['MODIFY_TIME'] = order['MODIFY_TIME'].astype('str')order['CANCEL_TIME'] = order['CANCEL_TIME'].astype('str')
order['ORDER_TIME'] = order['ORDER_TIME'].fillna(0).astype('str').apply(lambda x: x[:12])

##########################没有考虑包含取消时间的订单

order = order[(order['CANCEL_TIME']=='nan')]
order = order.sort_values(['ORDER_ID', 'MODIFY_TIME'])
order.drop_duplicates('ORDER_ID', keep='last', inplace=True)
order.shape
(29941, 11)
order[order['ORDER_ID']==4402846]
ORDER_IDORDER_PRIMARY_IDHOTELIDPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMESTATUSCANCEL_TIMEINSERT_TIMEMODIFY_TIMEFIRM
301414402846BC0E8E3602434EA6A5F29A6F6FF422331001772021071113582021071211582021071110321nan20210711140042202107111400423
order.info()
<class 'pandas.core.frame.DataFrame'>Int64Index: 29941 entries, 2 to 18590Data columns (total 11 columns): #   Column            Non-Null Count  Dtype ---  ------            --------------  -----  0   ORDER_ID          29941 non-null  int64  1   ORDER_PRIMARY_ID  29941 non-null  object 2   HOTELID           29941 non-null  object 3   PRE_IN_TIME       29941 non-null  object 4   PRE_OUT_TIME      29941 non-null  object 5   ORDER_TIME        29941 non-null  object 6   STATUS            29941 non-null  int64  7   CANCEL_TIME       29941 non-null  object 8   INSERT_TIME       29941 non-null  object 9   MODIFY_TIME       29941 non-null  object 10  FIRM              29941 non-null  int64 dtypes: int64(3), object(8)memory usage: 2.7+ MB
order.nunique()
ORDER_ID            29941ORDER_PRIMARY_ID    29941HOTELID              4801PRE_IN_TIME          6875PRE_OUT_TIME          729ORDER_TIME          26818STATUS                  3CANCEL_TIME             1INSERT_TIME         25432MODIFY_TIME         25432FIRM                    2dtype: int64
order_info['FIRM'].value_counts()
3     3202910     8521Name: FIRM, dtype: int64
order['STATUS'].value_counts()
1    261372     32213      583Name: STATUS, dtype: int64
#插入时间不等于修改时间,就是进行过修改过的订单,最后发现为空,等于没有修改过order[order['INSERT_TIME']!=order['MODIFY_TIME']]
ORDER_IDORDER_PRIMARY_IDHOTELIDPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMESTATUSCANCEL_TIMEINSERT_TIMEMODIFY_TIMEFIRM
#订单时间# order_info['ORDER_TIME'][2]
# order_info_cancel = order_info[~order_info['CANCEL_TIME'].isnull()]# order_info_cancel.head()
# order_info_cancel.shape
# order_info_cancel[order_info_cancel['CANCEL_TIME'] < order_info_cancel['PRE_IN_TIME']].shape
# cancel_index = order_info_cancel[order_info_cancel['CANCEL_TIME'] < order_info_cancel['PRE_IN_TIME']].index.tolist()
# order_info = order_info[~order_info.index.isin(cancel_index)]
# order_info.reset_index(inplace=True)
# order_info = order_info.drop(columns='index')
# order_info.tail()

合并数据

# order时间包含了cust时间df = pd.merge(order, cust, on='ORDER_PRIMARY_ID')

月份分为1、3、5、7、8、10、12:31天;2:28天;4、6、9、11:30天

df['IN_TIME'] = df['IN_TIME'].apply(lambda x: np.nan if x=='0.0' else x)df['OUT_TIME'] = df['OUT_TIME'].apply(lambda x: np.nan if x=='0.0' else x)df['IN_TIME'].fillna(df['PRE_IN_TIME'], inplace=True)df['OUT_TIME'].fillna(df['PRE_OUT_TIME'], inplace=True)
df['OUT_TIME'] = df.apply(lambda x: x['OUT_TIME'] if x['OUT_TIME'] >= x['PRE_OUT_TIME'] else x['PRE_OUT_TIME'], axis=1)df['IN_TIME'] = df.apply(lambda x: x['IN_TIME'] if x['IN_TIME'] >= x['PRE_IN_TIME'] else x['PRE_IN_TIME'], axis=1)
df.drop(['PRE_IN_TIME', 'PRE_OUT_TIME', 'CANCEL_TIME'], axis=1, inplace=True)
df.head(2)
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnull
0706648A4AACE06B518418C8A8CA1935DDD1C5A227185202007092241120200714171021202007141710213NaN2001040937100220200715130020200716120010
174864767D551AACFD049AE9CC2AB65E98706789483202007132109120201002101040202010021010403NaN1988102334120220201001140020201002101110
df.info()
<class 'pandas.core.frame.DataFrame'>Int64Index: 29941 entries, 0 to 29940Data columns (total 15 columns): #   Column             Non-Null Count  Dtype ---  ------             --------------  -----  0   ORDER_ID           29941 non-null  int64  1   ORDER_PRIMARY_ID   29941 non-null  object 2   HOTELID            29941 non-null  object 3   ORDER_TIME         29941 non-null  object 4   STATUS             29941 non-null  int64  5   INSERT_TIME        29941 non-null  object 6   MODIFY_TIME        29941 non-null  object 7   FIRM               29941 non-null  int64  8   GUEST_ID           3561 non-null   object 9   BDATE              29941 non-null  object 10  XZQH               29941 non-null  object 11  IN_TIME            29941 non-null  object 12  OUT_TIME           29941 non-null  object 13  guest_sum          29941 non-null  int64  14  guest_sum_notnull  29941 non-null  int64 dtypes: int64(5), object(10)memory usage: 3.7+ MB
df.nunique();
ORDER_ID             29941ORDER_PRIMARY_ID     29941HOTELID               4801ORDER_TIME           26818STATUS                   3INSERT_TIME          25432MODIFY_TIME          25432FIRM                     2GUEST_ID              3561BDATE                 9036XZQH                  2575IN_TIME               8953OUT_TIME               732guest_sum                3guest_sum_notnull        3dtype: int64
df['in_time'] = pd.to_datetime(df['IN_TIME'], errors='coerce', format='%Y%m%d%H%M')df['out_time'] = pd.to_datetime(df['OUT_TIME'], errors='coerce', format='%Y%m%d%H%M')
df = df[~df['out_time'].isnull()]
df.head(2)
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_time
0706648A4AACE06B518418C8A8CA1935DDD1C5A227185202007092241120200714171021202007141710213NaN20010409371002202007151300202007161200102020-07-15 13:00:002020-07-16 12:00:00
174864767D551AACFD049AE9CC2AB65E98706789483202007132109120201002101040202010021010403NaN19881023341202202010011400202010021011102020-10-01 14:00:002020-10-02 10:11:00
df['in_time_year'] = df['in_time'].dt.year.fillna(0).astype('int')                 #入住时间的年份df['out_time_year'] = df['out_time'].dt.year.fillna(0).astype('int')               #退房时间的年份df['in_time_month'] = df['in_time'].dt.month.fillna(0).astype('int')               #入住时间的月份df['out_time_month'] = df['out_time'].dt.month.fillna(0).astype('int')             #退房时间的月份df['in_time_day'] = df['in_time'].dt.day.fillna(0).astype('int')                   #入住时间的日期df['out_time_day'] = df['out_time'].dt.day.fillna(0).astype('int')                 #退房时间的日期df['in_time_hour'] = df['in_time'].dt.hour.fillna(0).astype('int')                 #入住时间的小时df['out_time_hour'] = df['out_time'].dt.hour.fillna(0).astype('int')               #退房时间的小时# df['in_time_day'] = df['in_time'].dt.day.fillna(0).astype('int')# df['out_time_day'] = df['out_time'].dt.day.fillna(0).astype('int')# df['in_time_day'] = df['in_time'].dt.day.fillna(0).astype('int')# df['out_time_day'] = df['out_time'].dt.day.fillna(0).astype('int')
#2月份没有df['day_gap'] = df['out_time_day'] - df['in_time_day'] + (df['out_time_month'] - df['in_time_month']).apply(lambda x: x*30 if x>0 else 0)
df[df['day_gap']>32]
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_timein_time_yearout_time_yearin_time_monthout_time_monthin_time_dayout_time_dayin_time_hourout_time_hourday_gap
915440816794DCD1BDDF65B425292A7B92C8E895F0D315187202106151752320210615175403202106151754033371002A67620210616000119890208371302202106161358202107311158112021-06-16 13:58:002021-07-31 11:58:0020212021671631131145
20135472656625EF2A4A528D468BAE986CF25E88A480282552202108121704120210813120007202108131200073NaN19650327230122202108131158202110121158102021-08-13 11:58:002021-10-12 11:58:00202120218101312111159
263563085175862AEF4D8DECE4715962AC0588536F3FEB1027170022020122819491202012281959042020122819590410NaN19990312411303202101060000202102280000102021-01-06 00:00:002021-02-28 00:00:0020212021126280052
# df[df['in_time_month']==12]['day_gap'] = df[df['in_time_month']==12]['day_gap'].apply(lambda x: x+31)df['day_gap'] = df.apply(lambda x: x['day_gap']+31 if x['in_time_month']==12 and x['out_time_month']==1 else x['day_gap'], axis=1)
month_ = [1, 3, 5, 7, 8, 10]df['day_gap'] = df.apply(lambda x: x['day_gap']+1 if x['in_time_month'] in month_ and x['out_time_month']>x['in_time_month'] else x['day_gap'], axis=1)
df['day_gap'].sum()
49261

#后移一天
df[‘lock_time’][0] + pd.Timedelta(days=1)
#前移一天
df[‘lock_time’][0] + pd.Timedelta(days=-1)
#df[‘in_time’] - timedelta(days=1) #将day_gap列为0的数据全部往后推迟一天;;;;;;;还有钟点房

# df_0 = df[(df['day_gap']==0)]# df_0['date'] = df_0['IN_TIME']# 这里边包含了钟点房和当天入住并退房的。
dfs = []for idx, group in df.groupby(['ORDER_PRIMARY_ID']):    stat = pd.DataFrame()    stat['date'] = pd.date_range(start=group['IN_TIME'].values[0], end=group['OUT_TIME'].values[0], freq='D', normalize=False, closed=None)    stat['ORDER_PRIMARY_ID'] = group['ORDER_PRIMARY_ID'].values[0]    dfs.append(stat)
# df_0 = df_0[['date', 'ORDER_PRIMARY_ID']]# dfs.append(df_0)
df_date = pd.concat(dfs).reset_index(drop=True)
df1 = df.merge(df_date, on=['ORDER_PRIMARY_ID'], how='left')
df1.shape
(60973, 27)
df1[(df1['out_time']!=df1['date']) & (df1['day_gap']!=0)].shape
(50058, 27)
df1[:10]
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_timein_time_yearout_time_yearin_time_monthout_time_monthin_time_dayout_time_dayin_time_hourout_time_hourday_gapdate
0706648A4AACE06B518418C8A8CA1935DDD1C5A227185202007092241120200714171021202007141710213NaN20010409371002202007151300202007161200102020-07-15 13:00:002020-07-16 12:00:0020202020771516131212020-07-15 13:00:00
174864767D551AACFD049AE9CC2AB65E98706789483202007132109120201002101040202010021010403NaN19881023341202202010011400202010021011102020-10-01 14:00:002020-10-02 10:11:0020202020101012141012020-10-01 14:00:00
275745543F69E6DFE004103BC4741C355D2A793275834202007141819220200714181929202007141819293371002136620200715000119920216370784202007151400202007161200112020-07-15 14:00:002020-07-16 12:00:0020202020771516141212020-07-15 14:00:00
375775442157BB31FD340519BB6C8DD073F2FBC8489202007141847120200714184726202007141847263NaN19860622370102202007151400202007171200102020-07-15 14:00:002020-07-17 12:00:0020202020771517141222020-07-15 14:00:00
475775442157BB31FD340519BB6C8DD073F2FBC8489202007141847120200714184726202007141847263NaN19860622370102202007151400202007171200102020-07-15 14:00:002020-07-17 12:00:0020202020771517141222020-07-16 14:00:00
57578298C0C98A722FC4A3DAEFC6296AC63FF84149513202007141854120200714185457202007141854573NaN19970731370983202007241400202007251200102020-07-24 14:00:002020-07-25 12:00:0020202020772425141212020-07-24 14:00:00
675851845B92315B9F44E59B383046A05429357149876202007142003120200714200342202007142003423NaN19790126370105202007191400202007211200102020-07-19 14:00:002020-07-21 12:00:0020202020771921141222020-07-19 14:00:00
775851845B92315B9F44E59B383046A05429357149876202007142003120200714200342202007142003423NaN19790126370105202007191400202007211200102020-07-19 14:00:002020-07-21 12:00:0020202020771921141222020-07-20 14:00:00
8759511CDE6BDDDE85044F7BAA53F377DFC012F10273202007142137120200714213718202007142137183NaN19840919370883202007161400202007191200102020-07-16 14:00:002020-07-19 12:00:0020202020771619141232020-07-16 14:00:00
9759511CDE6BDDDE85044F7BAA53F377DFC012F10273202007142137120200714213718202007142137183NaN19840919370883202007161400202007191200102020-07-16 14:00:002020-07-19 12:00:0020202020771619141232020-07-17 14:00:00
df1[(df1['HOTELID']=='149876') & (df1['in_time_month']==9)]
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_timein_time_yearout_time_yearin_time_monthout_time_monthin_time_dayout_time_dayin_time_hourout_time_hourday_gapdate
df1[df1['day_gap']==0][:2]
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_timein_time_yearout_time_yearin_time_monthout_time_monthin_time_dayout_time_dayin_time_hourout_time_hourday_gapdate
9451067205BD1B2089E4DA4D11A61852F393565F4B276992202008130010320200813001054202008130010543371002143820200814000119940402410102202008140817202008141200112020-08-14 08:17:002020-08-14 12:00:002020202088141481202020-08-14 08:17:00
20421369323D063FB8024EC432B89E7A9943EE8C5048845202009110304120200912141247202009121412473NaN19881128370103202009121400202009121413102020-09-12 14:00:002020-09-12 14:13:0020202020991212141402020-09-12 14:00:00

stat = pd.DataFrame()
stat[‘date’] = pd.date_range(
start=group[‘IN_TIME’].values[0],#开始时间
end=group[‘OUT_TIME’].values[0],#截止时间
periods=4,#总长度
freq=‘D’,#时间间隔
tz=None,#时区
normalize=False,#是否标准化到midnight
name=None,#date名称
closed=None,#首尾是否在内
**kwargs,
)

stat['ORDER_PRIMARY_ID'] = group['ORDER_PRIMARY_ID'].values[0]
stat
date
02021-06-30 13:58:00
12021-07-01 13:58:00
22021-07-02 13:58:00
32021-07-03 13:58:00

df[df['HOTELID']=='329845']
ORDER_IDORDER_PRIMARY_IDHOTELIDORDER_TIMESTATUSINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEguest_sumguest_sum_notnullin_timeout_timein_time_yearout_time_yearin_time_monthout_time_monthin_time_dayout_time_dayin_time_hourout_time_hourday_gap
198614682023B9DF5566F1A1436793714B423FF03526329845202108052135220210805213700202108052137003371002B74020210805000119940626230381202108052321202108061158112021-08-05 23:02:012021-08-06 11:05:0820212021885623111
199604698714CD3507B0EA2B4A95A1FF52677589FBD1329845202108081409220210808141123202108081411233371002B74020210808000119701008230184202108082122202108101158112021-08-08 21:02:022021-08-10 11:05:08202120218881021112
2035747548630013998FA8FB420D94A3AD0C16D46BA8329845202108161319220210826125548202108261255483371002B74020210826000119780323149001202108261805202108291158112021-08-26 18:00:052021-08-29 11:05:082021202188262918113
2037047567246D3512C9513145B48B35FFC5A98FAD5A329845202108161757220210816180002202108161800023371002B74020210816000119871210231222202108161903202108171158112021-08-16 19:00:032021-08-17 11:05:082021202188161719111
205924787023757875461A7C437AAAE942F0CE4E0043329845202108201739220210822103001202108221030013371002B74020210823000119920605430122202108222108202108231158112021-08-22 21:00:082021-08-23 11:05:082021202188222321111
20876481835435B453B2472C4812A1332BECED86275D329845202108241537320210824154233202108241542333371002B74020210825000119860313230231202108251158202108261158112021-08-25 11:05:082021-08-26 11:05:082021202188252611111
213184867480674C4B99BDA64E56892AAB65223C5533329845202108301202220210830123119202108301231193371002B74020210830000119940523370687202108302105202108311158112021-08-30 21:00:052021-08-31 11:05:082021202188303121111
213874875628A2F43E8280B249E5A9BDE08F5A5F2664329845202108311252220210831130914202108311309143371002B74020210901000119811024220381202109010857202109011158112021-09-01 08:05:072021-09-01 11:05:082021202199118110
room_info[room_info['CODE']=='329845']
CODEHOTELIDJYMJROOM_NUMBED_NUMCZLYCALLEDBUR_CODESTA_CODESSXADDRESSMPHMFIRMDJSJBGSJ
3051329845371002B7400.0231温馨精致观海房/loft名宿/孙家疃油画小镇/临近海水浴场3.710020e+11371000000001371002环翠区孙家疃望海·山公馆公寓13-16_B304室---__-~ROOM00132021-08-03 09:58:052021-08-03 10:01:02
df_month = df.groupby(['in_time_year', 'in_time_month'])['ORDER_ID']df_month.count()
in_time_year  in_time_month2020          7                 182              8                 885              9                1571              10               2226              11               1179              12               22792021          1                1596              2                 137              3                   5              4                 420              5                4893              6                3856              7                8507              8                2069              9                  99              10                 25              11                  1Name: ORDER_ID, dtype: int64




df['IN_TIME'].min(), df['IN_TIME'].max()
('202007151300', '202111022358')
df.info()
<class 'pandas.core.frame.DataFrame'>Int64Index: 29930 entries, 0 to 29940Data columns (total 26 columns): #   Column             Non-Null Count  Dtype         ---  ------             --------------  -----          0   ORDER_ID           29930 non-null  int64          1   ORDER_PRIMARY_ID   29930 non-null  object         2   HOTELID            29930 non-null  object         3   ORDER_TIME         29930 non-null  object         4   STATUS             29930 non-null  int64          5   INSERT_TIME        29930 non-null  object         6   MODIFY_TIME        29930 non-null  object         7   FIRM               29930 non-null  int64          8   GUEST_ID           3561 non-null   object         9   BDATE              29930 non-null  object         10  XZQH               29930 non-null  object         11  IN_TIME            29930 non-null  object         12  OUT_TIME           29930 non-null  object         13  guest_sum          29930 non-null  int64          14  guest_sum_notnull  29930 non-null  int64          15  in_time            29930 non-null  datetime64[ns] 16  out_time           29930 non-null  datetime64[ns] 17  in_time_year       29930 non-null  int32          18  out_time_year      29930 non-null  int32          19  in_time_month      29930 non-null  int32          20  out_time_month     29930 non-null  int32          21  in_time_day        29930 non-null  int32          22  out_time_day       29930 non-null  int32          23  in_time_hour       29930 non-null  int32          24  out_time_hour      29930 non-null  int32          25  day_gap            29930 non-null  int64         dtypes: datetime64[ns](2), int32(8), int64(6), object(10)memory usage: 5.3+ MB
df.nunique()
ORDER_ID             29930ORDER_PRIMARY_ID     29930HOTELID               4801ORDER_TIME           26807STATUS                   3INSERT_TIME          25421MODIFY_TIME          25421FIRM                     2GUEST_ID              3561BDATE                 9035XZQH                  2575IN_TIME               8953OUT_TIME               730guest_sum                3guest_sum_notnull        3in_time               8953out_time               730in_time_year             2out_time_year            2in_time_month           12out_time_month          12in_time_day             31out_time_day            31in_time_hour            23out_time_hour           15day_gap                 33dtype: int64
df.rename(columns={'HOTELID':'CODE'}, inplace=True)
df = pd.merge(df, room_info, on=['CODE', 'FIRM'])
df.head(2)
ORDER_IDORDER_PRIMARY_IDCODEPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMESTATUSCANCEL_TIMEINSERT_TIMEMODIFY_TIMEFIRMGUEST_IDBDATEXZQHIN_TIMEOUT_TIMEin_hourin_minutein_timeout_hourout_minuteout_timein_datetimeout_datetimeHOTELIDJYMJROOM_NUMBED_NUMCZLYCALLEDBUR_CODESTA_CODESSXADDRESSMPHMDJSJBGSJ
092352196BBDB7CC049421C85826AE07020B139278337202008011200202008021200159612011149.01nan20200730224152202007302241523NaN199308043208210.00.00.00.00.00.0NaTNaT37100A16200.0121金海滩临海落地窗公寓,北欧式装修NaNNaN371002环翠区高区金海滩街道新浪屿花园8号楼110411042020-07-06 17:59:352020-07-15 10:24:21
17612998923C26CC35F42C9A7C0A08BCC22179C278337202007221200202007251200202007150017.01nan20200715001752202007150017523NaN198106281201010.00.00.00.00.00.0NaTNaT37100A16200.0121金海滩临海落地窗公寓,北欧式装修NaNNaN371002环翠区高区金海滩街道新浪屿花园8号楼110411042020-07-06 17:59:352020-07-15 10:24:21
df.info()
<class 'pandas.core.frame.DataFrame'>Int64Index: 40043 entries, 0 to 40042Data columns (total 37 columns): #   Column            Non-Null Count  Dtype         ---  ------            --------------  -----          0   ORDER_ID          40043 non-null  int64          1   ORDER_PRIMARY_ID  40043 non-null  object         2   CODE              40043 non-null  object         3   PRE_IN_TIME       40043 non-null  object         4   PRE_OUT_TIME      40043 non-null  object         5   ORDER_TIME        40043 non-null  object         6   STATUS            40043 non-null  int64          7   CANCEL_TIME       40043 non-null  object         8   INSERT_TIME       40043 non-null  object         9   MODIFY_TIME       40043 non-null  object         10  FIRM              40043 non-null  int64          11  GUEST_ID          5131 non-null   object         12  BDATE             40043 non-null  object         13  XZQH              40043 non-null  object         14  IN_TIME           40043 non-null  object         15  OUT_TIME          40043 non-null  object         16  in_hour           40043 non-null  object         17  in_minute         40043 non-null  object         18  in_time           40043 non-null  object         19  out_hour          40043 non-null  object         20  out_minute        40043 non-null  object         21  out_time          40043 non-null  object         22  in_datetime       4913 non-null   datetime64[ns] 23  out_datetime      644 non-null    datetime64[ns] 24  HOTELID           40043 non-null  object         25  JYMJ              40043 non-null  float64        26  ROOM_NUM          40043 non-null  int64          27  BED_NUM           40043 non-null  int64          28  CZLY              40043 non-null  int64          29  CALLED            40043 non-null  object         30  BUR_CODE          20204 non-null  float64        31  STA_CODE          20204 non-null  object         32  SSX               40043 non-null  int64          33  ADDRESS           40043 non-null  object         34  MPHM              40043 non-null  object         35  DJSJ              40043 non-null  object         36  BGSJ              40043 non-null  object        dtypes: datetime64[ns](2), float64(2), int64(7), object(26)memory usage: 11.6+ MB
df.nunique()
ORDER_ID            29941ORDER_PRIMARY_ID    39901CODE                 4801PRE_IN_TIME          6932PRE_OUT_TIME          741ORDER_TIME          26892STATUS                  3CANCEL_TIME             1INSERT_TIME         35319MODIFY_TIME         35319FIRM                    2GUEST_ID             3675BDATE                9064XZQH                 2578IN_TIME              3380OUT_TIME              235in_hour                24in_minute              61in_time               264out_hour               15out_minute             25out_time              211in_datetime           263out_datetime          210HOTELID              4801JYMJ                  173ROOM_NUM               11BED_NUM                17CZLY                    4CALLED               3813BUR_CODE                7STA_CODE               53SSX                     5ADDRESS              4377MPHM                  534DJSJ                 3363BGSJ                 2871dtype: int64
df.columns
Index(['ORDER_ID', 'ORDER_PRIMARY_ID', 'CODE', 'PRE_IN_TIME', 'PRE_OUT_TIME',       'ORDER_TIME', 'STATUS', 'CANCEL_TIME', 'INSERT_TIME', 'MODIFY_TIME',       'FIRM', 'GUEST_ID', 'BDATE', 'XZQH', 'IN_TIME', 'OUT_TIME', 'in_hour',       'in_minute', 'in_time', 'out_hour', 'out_minute', 'out_time',       'in_datetime', 'out_datetime', 'HOTELID', 'JYMJ', 'ROOM_NUM', 'BED_NUM',       'CZLY', 'CALLED', 'BUR_CODE', 'STA_CODE', 'SSX', 'ADDRESS', 'MPHM',       'DJSJ', 'BGSJ'],      dtype='object')

删除HOTELID,CANCEL_TIME

df1 = df[['ORDER_ID', 'CODE', 'PRE_IN_TIME', 'PRE_OUT_TIME',       'ORDER_TIME', 'INSERT_TIME', 'MODIFY_TIME', 'IN_TIME', 'OUT_TIME', 'JYMJ', 'ROOM_NUM', 'BED_NUM', 'STATUS']]

0-撤销;1-预定;2-入住;3-离开

df1['STATUS'].value_counts()
1    344612     49213      661Name: STATUS, dtype: int64
df1.head(50)
ORDER_IDCODEPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMEINSERT_TIMEMODIFY_TIMEIN_TIMEOUT_TIMEJYMJROOM_NUMBED_NUMSTATUS
0923521278337202008011200202008021200159612011149.020200730224152202007302241520.00.00.0121
1761299278337202007221200202007251200202007150017.020200715001752202007150017520.00.00.0121
2943703278337202008091200202008121200202008012010.020200801201019202008012010190.00.00.0121
31012389278337202008141200202008171200202008080900.020200808090100202008080901000.00.00.0121
41351873278337202010051200202010061200202009091245.020200909124547202009091245470.00.00.0121
51481730278337202010051200202010071200202009221039.020200922103911202009221039112020100516242020100712000.0123
61516263278337202010011200202010041200202009251502.020200925150231202009251502310.00.00.0121
71539785278337202010041200202010051200202009271942.020200927194258202009271942580.00.00.0121
81568043278337202010041200202010051200202009301837.020200930183801202009301838010.00.00.0121
91830343278337202010311200202011011200202010301016.020201031120256202010311202560.00.00.0121
101830343278337202010311200202011011200202010301016.020201030101655202010301016550.00.00.0121
111836522278337202010301200202010311200202010301933.020201030193317202010301933170.00.00.0121
121850449278337202011011200202011021200202011010951.020201101121128202011011211280.00.00.0121
131850449278337202011011200202011021200202011010951.020201101095111202011010951110.00.00.0121
142035663278337202011210022202011211200202011210022.020201121002216202011210022160.00.00.0121
152363298278337202101021200202101041200202012230028.020210102120800202101021208000.00.00.0121
162363298278337202101021200202101041200202012230028.020201223002859202012230028590.00.00.0121
172465555278337202101021200202101031200202101011632.020210101163250202101011632500.00.00.0121
182465555278337202101021200202101031200202101011632.020210102120701202101021207010.00.00.0121
193356024278337202104291158202105051158202104121231.020210429120002202104291200020.00.00.0121
203576969278337202105061158202105091158202104291925.020210506120002202105061200020.00.00.0121
213599404278337202105051158202105061158202105020724.020210505120008202105051200080.00.00.0121
223599404278337202105051158202105061158202105020724.020210502072638202105020726380.00.00.0121
233665000278337202106121158202106151158202105081926.020210508192825202105081928250.00.00.0121
243686275278337202105102004202105111158202105102004.020210510201002202105102010020.00.00.0121
253750952278337202105161158202105171158202105161150.020210516120228202105161202280.00.00.0121
263777571278337202105181904202105191158202105181904.020210518190621202105181906210.00.00.0121
273787561278337202105211158202105231158202105191621.020210519162332202105191623320.00.00.0121
283787561278337202105211158202105231158202105191621.020210521120006202105211200060.00.00.0121
293941122278337202106022128202106031158202106022128.020210602214001202106022140010.00.00.0121
303941122278337202106022128202106031158202106022128.020210602213051202106022130510.00.00.0121
313988599278337202106101158202106111158202106070140.020210610120016202106101200160.00.00.0121
323988599278337202106101158202106111158202106070140.020210607014246202106070142460.00.00.0121
334300717278337202107061158202107071158202107031110.020210706120003202107061200030.00.00.0121
344308464278337202107051158202107061158202107032053.020210703205553202107032055530.00.00.0121
354308464278337202107051158202107061158202107032053.020210705120003202107051200030.00.00.0121
36923696282932202008041400202008061200159612092325.020200730225524202007302255240.00.00.0231
371065945282932202008131400202008141200202008122227.020200812222725202008122227250.00.00.0231
381370082282932202009181400202009191200202009110933.020200911093350202009110933500.00.00.0231
391370328282932202009191400202009211200202009111005.020200911100510202009111005100.00.00.0231
401550691282932202010011400202010021200202009282156.020200928215621202009282156210.00.00.0231
411612645282932202010081400202010091200202010061132.020201006113229202010061132290.00.00.0231
424497012282932202107221358202107291158202107181945.020210718194727202107181947270.00.00.0231
434505766282932202107201358202107221158202107191521.020210719152358202107191523580.00.00.0231
444505766282932202107201358202107221158202107191521.020210720140022202107201400220.00.00.0231
45706648227185202007151300202007161200202007092241.020200714171021202007141710210.00.00.0111
46853709227185202007251300202007261200202007232243.020200723224348202007232243480.00.00.0111
471008589227185202008251300202008271200202008072052.020200807205244202008072052440.00.00.0111
481089505227185202008181300202008201200202008142213.020200814221323202008142213230.00.00.0111
491111054227185202008171300202008181200202008162352.020200816235228202008162352280.00.00.0111
df['CODE'].value_counts()[:10]
B106535897    420315790        278296093        222296537        187282520        177B105273690    142298705        124298707        122289326        116294473        116Name: CODE, dtype: int64
df['HOTELID'].value_counts()[:10]
37100A3821    420371098A808    27837100A9707    222371098B022    18737100A1898    17737100A3565    142371099A016    124371099A013    12237100A9501    11637100A8976    116Name: HOTELID, dtype: int64


df_g = df.groupby(['CODE', 'PRE_IN_TIME', 'PRE_OUT_TIME'])['ORDER_ID'].count().reset_index()
df_g.head()
CODEPRE_IN_TIMEPRE_OUT_TIMEORDER_ID
01001772021071113582021071211582
11001772021072413582021072611582
21001812021062713582021062911582
31001812021071913582021072011581
41001812021072213582021072311582

ORDER_ID有重复,MODIFY_TIME修改过,按照最后一次的修改确定时间

df1[df1['CODE']=='100177']
ORDER_IDCODEPRE_IN_TIMEPRE_OUT_TIMEORDER_TIMEINSERT_TIMEMODIFY_TIMEIN_TIMEOUT_TIMEJYMJROOM_NUMBED_NUMSTATUS
369374402846100177202107111358202107121158202107111032.020210711103458202107111034580.00.00.0111
369384402846100177202107111358202107121158202107111032.020210711140042202107111400420.00.00.0111
369394455098100177202107241358202107261158202107151446.020210724140006202107241400060.00.00.0111
369404455098100177202107241358202107261158202107151446.020210715144850202107151448500.00.00.0111
df1.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 40043 entries, 0 to 40042
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ORDER_ID      40043 non-null  int64  
 1   CODE          40043 non-null  object 
 2   PRE_IN_TIME   40043 non-null  object 
 3   PRE_OUT_TIME  40043 non-null  object 
 4   ORDER_TIME    40043 non-null  object 
 5   INSERT_TIME   40043 non-null  object 
 6   MODIFY_TIME   40043 non-null  object 
 7   IN_TIME       40043 non-null  object 
 8   OUT_TIME      40043 non-null  object 
 9   JYMJ          40043 non-null  float64
 10  ROOM_NUM      40043 non-null  int64  
 11  BED_NUM       40043 non-null  int64  
 12  STATUS        40043 non-null  int64  
dtypes: float64(1), int64(4), object(8)
memory usage: 4.3+ MB












  Python知识库 最新文章
Python中String模块
【Python】 14-CVS文件操作
python的panda库读写文件
使用Nordic的nrf52840实现蓝牙DFU过程
【Python学习记录】numpy数组用法整理
Python学习笔记
python字符串和列表
python如何从txt文件中解析出有效的数据
Python编程从入门到实践自学/3.1-3.2
python变量
上一篇文章      下一篇文章      查看所有文章
加:2022-02-26 11:26:58  更:2022-02-26 11:27:33 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年12日历 -2024/12/24 11:43:39-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码
数据统计