副本
zookeeper配置
内部直接修改 直接将其改成自己的就行
data:image/s3,"s3://crabby-images/c3ad1/c3ad19f14c1c3805baeaa496012d8b6e35dc25ee" alt="在这里插入图片描述"
外部文件形式 在/etc/clickhouse-server/config.d下创建metrika.xml文件
<?xml version="1.0"?>
<yandex>
<zookeeper-servers>
<node index="1">
<host>spark01</host>
<port>2181</port>
</node>
<node index="2">
<host>spark02</host>
<port>2181</port>
</node>
<node index="3">
<host>spark03</host>
<port>2181</port>
</node>
</zookeeper-servers>
</yandex>
分发给其他机器 data:image/s3,"s3://crabby-images/09488/094888c0818ff1957fbd653d50449ee7affa6aef" alt="在这里插入图片描述" 在/etc/clickhouse-server/config.xml中添加以下信息:
<zookeeper incl="zookeeper-servers" optional="true" />
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
data:image/s3,"s3://crabby-images/578fc/578fca0f045691137583644656ab1b60b18c73a9" alt="在这里插入图片描述" 分发集群 data:image/s3,"s3://crabby-images/3b920/3b920942579367fccff9b315fee52b8efd230fca" alt="在这里插入图片描述" 到此就配置完成了
测试
副本只能同步数据但是不能同步表结构数据
/clickhouse/table/01/t_order_rep 表示在zookeeper中的路径信息 其中01表示一个分片 rep_102 表示副本名称
分别在三台机子上创建表结构
spark01上创建:
create table t_order_rep2 (
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime
) engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','rep_101')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
spark02上创建:
create table t_order_rep2 (
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime
) engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','rep_102')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
spark03上创建:
create table t_order_rep2 (
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime
) engine =ReplicatedMergeTree('/clickhouse/table/01/t_order_rep','rep_103')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
向spark01中插入数据
insert into t_order_rep2 values
(101,'sku_001',1000.00,'2020-06-01 12:00:00'),
(102,'sku_002',2000.00,'2020-06-01 12:00:00'),
(103,'sku_004',2500.00,'2020-06-01 12:00:00'),
(104,'sku_002',2000.00,'2020-06-01 12:00:00'),
(105,'sku_003',600.00,'2020-06-02 12:00:00');
能在spark02,03上查到 data:image/s3,"s3://crabby-images/c1905/c1905f6c76b01362a57e50427a11ed110846c095" alt="在这里插入图片描述"
分片集群
分片就是将一张表的数据分布在不同的节点上,再通过 Distributed 表引擎把数据拼接起来一同使用。 Distributed 表引擎本身不存储数据只是用来管理其他分片
搭建
创建两个分片,第一个分片有一个副本
在config.d下创建metrika-shard.xml文件
<?xml version="1.0"?>
<yandex>
<remote_servers>
<clusters> <!-- 集群名称-->
<shard> <!--集群的第一个分片-->
<internal_replication>true</internal_replication>
<replica> <!--该分片的第一个副本-->
<host>spark01</host>
<port>9000</port>
</replica>
<replica> <!--该分片的第二个副本-->
<host>spark02</host>
<port>9000</port>
</replica>
</shard>
<shard> <!--集群的第二个分片-->
<internal_replication>true</internal_replication>
<replica> <!--该分片的第一个副本-->
<host>spark03</host>
<port>9000</port>
</replica>
</shard>
</clusters>
</remote_servers>
<zookeeper-servers>
<node index="1">
<host>spark01</host>
<port>2181</port>
</node>
<node index="2">
<host>spark02</host>
<port>2181</port>
</node>
<node index="3">
<host>spark03</host>
<port>2181</port>
</node>
</zookeeper-servers>
<macros>
<shard>01</shard> <!--不同机器放的分片数不一样-->
<replica>rep_1_1</replica> <!--不同机器放的副本数不一样-->
</macros>
</yandex>
分发给其他集群 data:image/s3,"s3://crabby-images/7b687/7b687df2e104f136f0121659375ffd1c4d27a61c" alt="在这里插入图片描述" 将spark02上的文件进行修改 data:image/s3,"s3://crabby-images/bb35d/bb35da8b21b807e1062fc0b9ce39a7f818afa603" alt="在这里插入图片描述" 将spark03上的文件进行修改 data:image/s3,"s3://crabby-images/b906b/b906b52ec2020d774905a6301eba062b00a7b856" alt="在这里插入图片描述" 在config.xml文件下将文件名进行修改 data:image/s3,"s3://crabby-images/c0cdb/c0cdbdea71b65ce3d840a77f7cadb07340728055" alt="在这里插入图片描述" 分发集群 data:image/s3,"s3://crabby-images/1a517/1a51744cd4187e9c2a4f3a0309bc2a7608a5cfb4" alt="在这里插入图片描述" 每次配置完config.xml必须重启一次服务
测试
先创建分片表
create table st_order_mt on cluster clusters (
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime
) engine =ReplicatedMergeTree('/clickhouse/tables/{shard}/st_order_mt','{replica}')
partition by toYYYYMMDD(create_time)
primary key (id)
order by (id,sku_id);
data:image/s3,"s3://crabby-images/ac306/ac306f3aeba87fb612018792e7edd5f04d9e3cdb" alt="在这里插入图片描述" 再创建 Distribute 分布式表
create table test2 on cluster clusters
(
id UInt32,
sku_id String,
total_amount Decimal(16,2),
create_time Datetime
)engine = Distributed(clusters,default, st_order_mt,hiveHash(sku_id));
data:image/s3,"s3://crabby-images/25c11/25c11b38578503f1577461fa7e59664cbe8f04be" alt="在这里插入图片描述" 向分布式表中插入数据
insert into test2 values
(201,'sku_001',1000.00,'2020-06-03 12:00:00') ;
(202,'sku_002',2000.00,'2020-06-01 12:00:00'),
(203,'sku_004',2500.00,'2020-06-01 12:00:00'),
(204,'sku_002',2000.00,'2020-06-01 12:00:00'),
(205,'sku_003',600.00,'2020-06-02 12:00:00');
第一个分片表 data:image/s3,"s3://crabby-images/353e2/353e25f221dd1f1b75300087e99ae6b19d38b5be" alt="在这里插入图片描述" 第一个分片表的副本 data:image/s3,"s3://crabby-images/a845a/a845a7562a4921106a67bba083370a06244e4ea3" alt="在这里插入图片描述" 第二个分片表 data:image/s3,"s3://crabby-images/7f2f9/7f2f92cef498fa8b088a2c4e0c5451253812d40e" alt="在这里插入图片描述"
|