1. 環境
CentOS 7.6
2. 安裝前准備
2.1 檢查是否支持 SSE 4.2 指令集
grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
2.2 CentOS 取消文件打開數限制
文件打開數限制
在 /etc/security/limits.conf 文件尾追加
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
# "*" 表示所有用戶都生效;
# "soft"表示應用軟件級別;
# "hard"表示操作系統級別;
# "nofile"表示最大文件打開數
# "nproc"表示最大進程數
用戶進程數限制
vim /etc/security/limits.d/20-nproc.conf
* soft nproc 131072
root soft nproc unlimited
重啟服務器后生效
臨時生效:ulimit -n 65535
2.3 CentOS 取消 SELINUC
vim /etc/selinux/config
SELINUX=disabled
3. 安裝
3.1 單機安裝
3.1.1 下載安裝
yum install clickhouse-server clickhouse-client
3.1.2 啟動 ClickHouse
前台啟動
sudo -u clickhouse clickhouse-server --config-file=/etc/clickhouse-server/config.xml
后台啟動
service clickhouse- start
或
nohup sudo -u clickhouse clickhouse-server --config-file=/etc/clickhouse-server/config.xml >/dev/null 2>&1 &
# /dev/null 表示空設備文件;可以將 /dev/null 看作 "黑洞",所有寫入它的東西都會丟失
# 0 表示 stdin 標准輸入
# 1 表示 stdout 標准輸出
# 2 表示 stderr 標准錯誤
# 2>&1 表示將標准錯誤重定向到標准輸出。這里表示標准錯誤也會輸出到 /dev/null
3.1.3 client 連接 server
clickhouse-client
3.2 高可用集群
3.2.1 高可用集群架構
ClickHouse集群:
3個節點
1個節點,2個ClickHouse實例
3分片,2副本
- cdh01: 實例1, 端口: tcp 9002, http 8123, 同步端口9009, 類型: 分片1, 副本1
- cdh01: 實例2, 端口: tcp 9003, http 8124, 同步端口9011, 類型: 分片3, 副本2 (cdh03的副本)
- cdh02: 實例1, 端口: tcp 9002, http 8123, 同步端口9009, 類型: 分片2, 副本1
- cdh02: 實例2, 端口: tcp 9003, http 8124, 同步端口9011, 類型: 分片1, 副本2 (cdh01的副本)
- cdh03: 實例1, 端口: tcp 9002, http 8123, 同步端口9009, 類型: 分片3, 副本1
- cdh03: 實例2, 端口: tcp 9003, http 8124, 同步端口9011, 類型: 分片2, 副本2 (cdh02的副本)
3.2.2 高可用集群部署
1. 啟動腳本復制一份
cp /etc/rc.d/init.d/clickhouse-server /etc/rc.d/init.d/clickhouse-server-1
vim /etc/rc.d/init.d/clickhouse-server-1
配置文件使用 config1.xml,pid使用clickhouse-server-1.pid
2.config.xml cdh01,cdh02,cdh03 的實例1
注意端口號 tcp-port 改為 9002,每個節點的 interserver_http_host 不一樣
vim /etc/clickhouse-server/config.xml
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<http_port>8123</http_port>
<tcp_port>9002</tcp_port>
<openSSL>
<server>
<certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
<dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
</server>
<client>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<interserver_http_port>9009</interserver_http_port>
<interserver_http_host>cdh03</interserver_http_host>
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<path>/var/lib/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<users_config>users.xml</users_config>
<default_profile>default</default_profile>
<default_database>default</default_database>
<timezone>Asia/Shanghai</timezone>
<mlock_executable>false</mlock_executable>
<remote_servers incl="clickhouse_remote_servers" >
<!--3分片2備份-->
<perftest_3shards_2replicas>
<!--數據分片1-->
<shard>
<!--分片權重,即有多大概率落到此分片上-->
<weight>1</weight>
<!--表示是否只將數據寫入其中一個副本,默認為flase,表示寫入所有副本-->
<internal_replication>true</internal_replication>
<replica>
<host>cdh01</host>
<port>9002</port>
<user>default</user>
<password>default</password>
</replica>
<replica>
<host>cdh02</host>
<port>9001</port>
<user>default</user>
<password>default</password>
</replica>
</shard>
<!--數據分片2-->
<shard>
<!--分片權重,即有多大概率落到此分片上-->
<weight>1</weight>
<!--表示是否只將數據寫入其中一個副本,默認為flase,表示寫入所有副本-->
<internal_replication>true</internal_replication>
<replica>
<host>cdh02</host>
<port>9002</port>
<user>default</user>
<password>default</password>
</replica>
<replica>
<host>cdh03</host>
<port>9001</port>
<user>default</user>
<password>default</password>
</replica>
</shard>
<!--數據分片3-->
<shard>
<!--分片權重,即有多大概率落到此分片上-->
<weight>1</weight>
<!--表示是否只將數據寫入其中一個副本,默認為flase,表示寫入所有副本-->
<internal_replication>true</internal_replication>
<replica>
<host>cdh03</host>
<port>9002</port>
<user>default</user>
<password>default</password>
</replica>
<replica>
<host>cdh01</host>
<port>9001</port>
<user>default</user>
<password>default</password>
</replica>
</shard>
</perftest_3shards_2replicas>
</remote_servers>
<!--設置擴展配置文件的路徑,默認為/etc/metrika.xml-->
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>60</default_session_timeout>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<compression incl="clickhouse_compression"></compression>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<max_table_size_to_drop>0</max_table_size_to_drop>
<max_partition_size_to_drop>0</max_partition_size_to_drop>
<graphite_rollup_example>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>3600</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup_example>
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
</yandex>
3.config1.xml cdh01,cdh02,cdh03的實例2
注意端口號 tcp_port 改為 9003,http_port 改為 8124,同步端口 改為 9011,每個節點的 interserver_http_host 不一樣
主要修改內容
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server-1/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server-1/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<http_port>8124</http_port>
<!-- 存儲路徑 -->
<path>/var/lib/clickhouse-1/</path>
<user_files_path>/var/lib/clickhouse-1/user_files/</user_files_path>
<!-- 臨時存儲路徑 -->
<tmp_path>/var/lib/clickhouse-1/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse-1/user_files/</user_files_path>
<tcp_port>9003</tcp_port>
<!-- Port for communication between replicas. Used for data exchange. -->
<interserver_http_port>9011</interserver_http_port>
<!--設置擴展配置文件的路徑,默認為/etc/metrika_1.xml-->
<include_from>/etc/clickhouse-server/metrika1.xml</include_from>
<format_schema_path>/var/lib/clickhouse-1/format_schemas/</format_schema_path>
4.cdh01,cdh02,cdh03
mkdir /var/log/clickhouse-server-1
chown clickhouse:clickhouse /var/log/clickhouse-server-1
5.users.xml 6個實例配置相同
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
</default>
<readonly>
<max_memory_usage>10000000000</max_memory_usage>
<use_uncompressed_cache>0</use_uncompressed_cache>
<load_balancing>random</load_balancing>
<readonly>1</readonly>
</readonly>
</profiles>
<quotas>
<!-- Name of quota. -->
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
<users>
<!--讀寫用戶-->
<default>
<password_sha256_hex>37a8eec1ce19687d132fe29051dca629d164e2c4958ba141d5f4133a33f0688f</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
<!--只讀用戶-->
<ck>
<password_sha256_hex>d93beca6efd0421b314c081066064ac0e371b306f715cc0935b2879e249ba9df</password_sha256_hex>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</ck>
</users>
</yandex>
如何生成密碼?
暗文,sha256sum的Hash值
PASSWORD=default
echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
6.metrika.xml
cdh01,cdh02,cdh03 的實例1
vim /etc/clickhouse-server/metrika.xml
cdh01,cdh02,cdh03 的實例1
vim /etc/clickhouse-server/metrika1.xml
<yandex>
<zookeeper-servers>
<node index="1">
<host>cdh01</host>
<port>2181</port>
</node>
<node index="2">
<host>cdh02</host>
<port>2181</port>
</node>
<node index="3">
<host>cdh03</host>
<port>2181</port>
</node>
</zookeeper-servers>
<!-- layer 是雙極分片設置,我們是單集群,這里是01 shard 表示分片編號; replica 使用了cluster{layer}-{shard}-{replica} -->
<macros>
<layer>01</layer>
<shard>03</shard> <!--表示cluster01集群的03分片下的1號副本-->
<replica>cluster01-03-1</replica>
</macros>
<networks>
<ip>::/0</ip>
</networks>
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
注意:6個節點(除 macros 外)metrika.xml 配置相同
4.集群測試
4.1 集群啟動
啟動6個實例,兩種啟動方式
service clickhouse-server start
service click-server-1 start
4.2 啟動6個客戶端
clickhouse-client -h cdh01 -m -u default --password default --port 9002
clickhouse-client -h cdh01 -m -u default --password default --port 9003
clickhouse-client -h cdh02 -m -u default --password default --port 9002
clickhouse-client -h cdh02 -m -u default --password default --port 9003
clickhouse-client -h cdh03 -m -u default --password default --port 9002
clickhouse-client -h cdh03 -m -u default --password default --port 9003
4.3 測試案例參考:https://hzkeung.com/2018/06/30/clickhouse-cluster-test
采用 ReplicatedMergeTree + Distributed引擎作為集群結構的引擎
ReplicatedMergeTree(zoo_path, replica_name,partition,primykey,8192)
- zoo_path,zk路徑(自動在zookeeper中創建),如果要相互復制,必須一樣,
- replica_name'副本名稱, 必須不一樣,
- partition,分區
- primykey,含有主鍵相關字段的元組,可以為單獨列
- 8192,索引粒度)
Distributed(cluster, datebase, local_table[, sharding_key])
- cluster,需要寫成在config里自定義的cluster名稱
- database,是分片數據庫的名稱
- local_table,是分片本地表的名稱 -最后一項sharding_key是選填的,可以是一個表達式,例如rand(),也可以是某列 如user_id,不過該列必須是integer類型,通過對該具體的值進行取余進行分片,如果擔心這樣沒法均勻的進行分片,也可以加上hash函數,如intHash64(user_id)
4.4 測試表
create database ontime;
use ontime;
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/ontime','cluster01-01-1',FlightDate,(Year, FlightDate),8192); #cdh01的實例1,分片01,副本1
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-03/ontime','cluster01-03-2',FlightDate,(Year, FlightDate),8192); #cdh01的實例2,分片03,副本2
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-02/ontime','cluster01-02-1',FlightDate,(Year, FlightDate),8192); #cdh02的實例1,分片02,副本1
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/ontime','cluster01-01-2',FlightDate,(Year, FlightDate),8192); #cdh02的實例2,分片01,副本2
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-03/ontime','cluster01-03-1',FlightDate,(Year, FlightDate),8192); #cdh03的實例1,分片03,副本1
CREATE TABLE ontime (FlightDate Date,Year UInt16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-02/ontime','cluster01-02-2',FlightDate,(Year, FlightDate),8192); #cdh03的實例2,分片02,副本2
CREATE TABLE ontime_all (FlightDate Date,Year UInt16) ENGINE= Distributed(perftest_3shards_2replicas, ontime, ontime, rand()); #每個實例都執行
4.5 測試過程
# 在 cdh01 的實例1 (分片01,副本1) 上執行該語句
insert into ontime (FlightDate,Year)values('2001-10-12',2001);
# 在 6個實例上執行該語句
select * from ontime;
可以發現cdh01 實例1(分片01,副本1) 和 cdh02 實例2(分片01,副本2) 兩個節點的本地表查詢到數據;數據成功復制副本,當其中1個副本掛掉后,集群還可繼續使用
# 在 6個實例上執行該語句
select * from ontime_all;
在所有實例上均可查詢到數據;任一節點都可讀取分布表service