IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 大数据 -> 2021.11.16Zeppelin建表查表练习 -> 正文阅读

[大数据]2021.11.16Zeppelin建表查表练习

%sh
mkdir -p /opt/hivedemo/data/
show databases;

%sh
cd /opt/hivedemo/data
ls
echo '---------------------------'
wc -l transaction_details.csv
echo '--------------------------'
wc -l store_review.csv
head -2 store_review.csv
echo '--------------------------'
wc -l store_details.csv
head -6 store_details.csv
echo '---------------------------'
wc -l customer_details.csv
head -3 customer_details.csv

%sh
cd /opt/hivedemo/data
hdfs dfs -mkdir -p /opt/hivedemo/data/customer
hdfs dfs -mkdir -p /opt/hivedemo/data/transaction
hdfs dfs -mkdir -p /opt/hivedemo/data/store
hdfs dfs -mkdir -p /opt/hivedemo/data/review
hdfs dfs -ls /opt/hivedemo/data
hdfs dfs -put ./customer_details.csv /opt/hivedemo/data/customer
hdfs dfs -put ./transaction_details.csv /opt/hivedemo/data/transaction
hdfs dfs -put ./store_details.csv /opt/hivedemo/data/store
hdfs dfs -put ./store_review.csv /opt/hivedemo/data/review

%hive
--create database shopping;
show databases;

%hive
use shopping;
create external table if not exists ext_customer_details(
customer_id string,
first_name string,
last_name string,
email string,
gender string,
address string,
country string,
language string,
job string,
credit_type string,
credit_no string
)
row format delimited fields terminated by ','
location  '/opt/hivedemo/data/customer/'
tblproperties("skip.header.line.count"="1")

%hive
create external table if not exists ext_store_details(
store_id string,
store_name string,
employee_number int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/store/'
tblproperties("skip.header.line.count"="1")

%hive
create external table if not exists ext_store_review(
transaction_id string,
store_id string,
review_score int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/review/'
tblproperties("skip.header.line.count"="1")

%hive
--ext_transaction_details
create external table if not exists ext_transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
date string,
time string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
location  '/opt/hivedemo/data/transaction/'
tblproperties("skip.header.line.count"="1")

%hive
with
t1 as (select *,row_number() over(partition by transaction_id  order by 8 asc) as rn   from ext_transaction_details) select *  from t1 where rn>1

%hive
with
t1 as (select row_number() over(partition by transaction_id  order by transaction_id  asc) as rn,*   from ext_transaction_details) select *  from t1 where rn>1

%hive
create view if not exists vw_store_review as 
select * from ext_store_review where review_score='';

%hive
create view if not exists vw_customer_details as 
select customer_id,
first_name,
unbase64(last_name)  last_name,
unbase64(email)  email,
gender,
unbase64(address)  address,
country,
language,
job,
credit_type,
unbase64(concat(unbase64(credit_no),"kb15")) credit_no
from ext_customer_details

%hive
create table if not exists transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(8,2),
product string,
purchase_time string,
purchase_date date
)
partitioned by (purchase_month string)

%hive
select 
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details

%hive
set hive.exec.dynamic.partition.mode=nonstrict

%hive
select from_unixtime(unix_timestamp("2021-11-16",'yyyy-MM-dd'),'yyyy-MM') as purchase_month

%hive
with
t1 as (
select 
transaction_id,
customer_id,
store_id,
price,
product,
time,
date,
from_unixtime(unix_timestamp(date,'yyyy-MM-dd'),'yyyy-MM') as purchase_month,
row_number() over(partition by transaction_id order by store_id ) as rn
from ext_transaction_details
)
insert overwrite table transaction_details partition(purchase_month)
select 
if(rn=1,transaction_id,concat(transaction_id,'_rep',rn)),
customer_id,
store_id,
price,
product,
time,
date,
purchase_month
from t1

最常用的信用卡
%hive
select country, credit_type, count(distinct credit_no) num from vw_customer_details group by country,credit_type order by num desc

  大数据 最新文章
实现Kafka至少消费一次
亚马逊云科技:还在苦于ETL?Zero ETL的时代
初探MapReduce
【SpringBoot框架篇】32.基于注解+redis实现
Elasticsearch:如何减少 Elasticsearch 集
Go redis操作
Redis面试题
专题五 Redis高并发场景
基于GBase8s和Calcite的多数据源查询
Redis——底层数据结构原理
上一篇文章      下一篇文章      查看所有文章
加:2021-11-17 12:49:38  更:2021-11-17 12:50:37 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/24 6:04:43-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码