%sh
mkdir -p /opt/hivedemo/data/
show databases;
%sh
cd /opt/hivedemo/data
ls
echo '---------------------------'
wc -l transaction_details.csv
echo '--------------------------'
wc -l store_review.csv
head -2 store_review.csv
echo '--------------------------'
wc -l store_details.csv
head -6 store_details.csv
echo '---------------------------'
wc -l customer_details.csv
head -3 customer_details.csv
%sh
cd /opt/hivedemo/data
hdfs dfs -mkdir -p /opt/hivedemo/data/customer
hdfs dfs -mkdir -p /opt/hivedemo/data/transaction
hdfs dfs -mkdir -p /opt/hivedemo/data/store
hdfs dfs -mkdir -p /opt/hivedemo/data/review
hdfs dfs -ls /opt/hivedemo/data
hdfs dfs -put ./customer_details.csv /opt/hivedemo/data/customer
hdfs dfs -put ./transaction_details.csv /opt/hivedemo/data/transaction
hdfs dfs -put ./store_details.csv /opt/hivedemo/data/store
hdfs dfs -put ./store_review.csv /opt/hivedemo/data/review
%hive
--create database shopping;
show databases;
%hive
use shopping;
create external table if not exists ext_customer_details(
customer_id string,
first_name string,
last_name string,
email string,
gender string,
address string,
country string,
language string,
job string,
credit_type string,
credit_no string
)
row format delimited fields terminated by ','
location '/opt/hivedemo/data/customer/'
tblproperties("skip.header.line.count"="1")
%hive
create external table if not exists ext_store_details(
store_id string,
store_name string,
employee_number int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location '/opt/hivedemo/data/store/'
tblproperties("skip.header.line.count"="1")
%hive
create external table if not exists ext_store_review(
transaction_id string,
store_id string,
review_score int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location '/opt/hivedemo/data/review/'
tblproperties("skip.header.line.count"="1")
%hive
--ext_transaction_details
create external table if not exists ext_transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
date string,
time string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location '/opt/hivedemo/data/transaction/'
tblproperties("skip.header.line.count"="1")
%hive
with
t1 as (select *,row_number() over(partition by transaction_id order by 8 asc) as rn from ext_transaction_details) select * from t1 where rn>1
%hive
with
t1 as (select row_number() over(partition by transaction_id order by transaction_id asc) as rn,* from ext_transaction_details) select * from t1 where rn>1
%hive
create view if not exists vw_store_review as
select * from ext_store_review where review_score='';
%hive
create view if not exists vw_customer_details as
select customer_id,
first_name,
unbase64(last_name) last_name,
unbase64(email) email,
gender,
unbase64(address) address,
country,
language,
job,
credit_type,
unbase64(concat(unbase64(credit_no),"kb15")) credit_no
from ext_customer_details
%hive
create table if not exists transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
purchase_time string,
purchase_date date
)
partitioned by (purchase_month string)
%hive
select
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details
%hive
set hive.exec.dynamic.partition.mode=nonstrict
%hive
select from_unixtime(unix_timestamp("2021-11-16",'yyyy-MM-dd'),'yyyy-MM') as purchase_month
%hive
with
t1 as (
select
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details
)
insert overwrite table transaction_details partition(purchase_month)
select
if(rn=1,transaction_id,concat(transaction_id,'_rep',rn)),
customer_id,
store_id,
price,
product,
time,
date,
purchase_month
from t1
最常用的信用卡
%hive
select country, credit_type, count(distinct credit_no) num from vw_customer_details group by country,credit_type order by num desc
|