模板附录:Clickhouse 配置文件
clickhouse config.xml
# 按参数编辑配置文件config.xml
sudo sed -i "s|<\!-- <timezone>Europe/Moscow</timezone> -->|<timezone>$timezone</timezone>|g" /etc/clickhouse-server/config.xml
sudo sed -i "s|<\!-- <listen_host>0.0.0.0</listen_host> -->|<listen_host>$listen_host</listen_host>|g" /etc/clickhouse-server/config.xml
# 按参数编辑配置文件users.xml
sudo sed -i "9a <max_threads>$max_threads</max_threads>" /etc/clickhouse-server/users.xml
sudo sed -i "9a <max_insert_threads>$max_insert_threads</max_insert_threads>" /etc/clickhouse-server/users.xml
sudo sed -i "9a <distributed_product_mode>$distributed_product_mode</distributed_product_mode>" /etc/clickhouse-server/users.xml
sudo sed -i "s|<max_memory_usage>10000000000</max_memory_usage>|<max_memory_usage>$max_memory_usage</max_memory_usage>|" /etc/clickhouse-server/users.xml
sudo sed -i "s|<load_balancing>random</load_balancing>|<load_balancing>$load_balancing</load_balancing>|" /etc/clickhouse-server/users.xml
sudo sed -i "s|<password></password>|<password>$password</password>|" /etc/clickhouse-server/users.xml
# 写死,default用户可以用sql管理user权限
sudo sed -i 's|<!-- <access_management>1</access_management> -->|<access_management>1</access_management>|' /etc/clickhouse-server/users.xml
# 判断是否s3disk,当前版本直接创建s3分层存储配置文件
# if true ,编辑 *config.d/storage.xml*
sudo cat << EOF > /etc/clickhouse-server/config.d/storage.xml
<yandex>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>http://s3.$REGION.amazonaws.com/$endpoint/clickhouse/</endpoint>
<use_environment_credentials>true</use_environment_credentials>
<max_connections>10000</max_connections>
</s3>
</disks>
<policies>
<AWStiered>
<volumes>
<default>
<disk>default</disk>
<max_data_part_size_bytes>$max_data_part_size_bytes</max_data_part_size_bytes>
<perform_ttl_move_on_insert>0</perform_ttl_move_on_insert>
</default>
<s3>
<disk>s3</disk>
</s3>
</volumes>
<move_factor>$move_factor</move_factor>
</AWStiered>
<s3only>
<volumes>
<s3>
<disk>s3</disk>
</s3>
</volumes>
</s3only>
</policies>
</storage_configuration>
</yandex>
EOF
sleep 1
################################## 集群模式脚本 #############################
# 创建集群,按参数编辑配置文件
sed -i '524, 629d' /etc/clickhouse-server/config.xml
sudo sed -i 's|<!--</remote_url_allow_hosts>-->|<include_from>/etc/clickhouse-server/metrika.xml</include_from>|' /etc/clickhouse-server/config.xml
sudo sed -i '542a <remote_servers incl="clickhouse_remote_servers" />' /etc/clickhouse-server/config.xml
sudo sed -i '542a <zookeeper incl="zookeeper-servers" optional="true" />' /etc/clickhouse-server/config.xml
# 创建集群metrika配置,每个节点一样,以下为2分片2副本的示例
cat << EOF > /etc/clickhouse-server/metrika.xml
<?xml version="1.0"?>
<yandex>
<!--ck集群节点-->
<clickhouse_remote_servers>
<ch_cluster_demo>
<!--分片1-->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>$CH_Cluster_shard1_replica1</host>
<port>9000</port>
<user>default</user>
<password>$password</password>
</replica>
<!--复制集1-->
<replica>
<host>$CH_Cluster_shard1_replica2</host>
<port>9000</port>
<user>default</user>
<password>$password</password>
</replica>
</shard>
<!--分片2-->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>$CH_Cluster_shard2_replica1</host>
<port>9000</port>
<user>default</user>
<password>$password</password>
</replica>
<!--复制集2-->
<replica>
<host>$CH_Cluster_shard2_replica2</host>
<port>9000</port>
<user>default</user>
<password>$password</password>
</replica>
</shard>
</ch_cluster_demo>
</clickhouse_remote_servers>
<!--zookeeper相关配置-->
<zookeeper-servers>
<node index="1">
<host>$zookeeper1</host>
<port>2181</port>
</node>
<node index="2">
<host>$zookeeper2</host>
<port>2181</port>
</node>
<node index="3">
<host>$zookeeper3</host>
<port>2181</port>
</node>
</zookeeper-servers>
<networks>
<ip>::/0</ip>
</networks>
<!--压缩相关配置-->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
<!--压缩算法lz4压缩比zstd快, 更占磁盘-->
</case>
</clickhouse_compression>
</yandex>
EOF
sleep 1
# 创建节点宏配置macros,每个节点不一样。
cat << EOF > /etc/clickhouse-server/config.d/macros.xml
<!--node1}-->
<yandex>
<macros>
<!--规范:{集群名称}-{shard}-{replica序号}-->
<replica>$replica</replica>
<shard>$shard</shard>
<layer>01</layer>
</macros>
</yandex>
EOF
sleep 1
建表SQL
CREATE TABLE `ontime_replica`
(
`Year` UInt16,
`Quarter` UInt8,
`Month` UInt8,
`DayofMonth` UInt8,
`DayOfWeek` UInt8,
`FlightDate` Date,
`Reporting_Airline` String,
`DOT_ID_Reporting_Airline` Int32,
`IATA_CODE_Reporting_Airline` String,
`Tail_Number` Int32,
`Flight_Number_Reporting_Airline` String,
`OriginAirportID` Int32,
`OriginAirportSeqID` Int32,
`OriginCityMarketID` Int32,
`Origin` FixedString(5),
`OriginCityName` String,
`OriginState` FixedString(2),
`OriginStateFips` String,
`OriginStateName` String,
`OriginWac` Int32,
`DestAirportID` Int32,
`DestAirportSeqID` Int32,
`DestCityMarketID` Int32,
`Dest` FixedString(5),
`DestCityName` String,
`DestState` FixedString(2),
`DestStateFips` String,
`DestStateName` String,
`DestWac` Int32,
`CRSDepTime` Int32,
`DepTime` Int32,
`DepDelay` Int32,
`DepDelayMinutes` Int32,
`DepDel15` Int32,
`DepartureDelayGroups` String,
`DepTimeBlk` String,
`TaxiOut` Int32,
`WheelsOff` Int32,
`WheelsOn` Int32,
`TaxiIn` Int32,
`CRSArrTime` Int32,
`ArrTime` Int32,
`ArrDelay` Int32,
`ArrDelayMinutes` Int32,
`ArrDel15` Int32,
`ArrivalDelayGroups` Int32,
`ArrTimeBlk` String,
`Cancelled` UInt8,
`CancellationCode` FixedString(1),
`Diverted` UInt8,
`CRSElapsedTime` Int32,
`ActualElapsedTime` Int32,
`AirTime` Nullable(Int32),
`Flights` Int32,
`Distance` Int32,
`DistanceGroup` UInt8,
`CarrierDelay` Int32,
`WeatherDelay` Int32,
`NASDelay` Int32,
`SecurityDelay` Int32,
`LateAircraftDelay` Int32,
`FirstDepTime` String,
`TotalAddGTime` String,
`LongestAddGTime` String,
`DivAirportLandings` String,
`DivReachedDest` String,
`DivActualElapsedTime` String,
`DivArrDelay` String,
`DivDistance` String,
`Div1Airport` String,
`Div1AirportID` Int32,
`Div1AirportSeqID` Int32,
`Div1WheelsOn` String,
`Div1TotalGTime` String,
`Div1LongestGTime` String,
`Div1WheelsOff` String,
`Div1TailNum` String,
`Div2Airport` String,
`Div2AirportID` Int32,
`Div2AirportSeqID` Int32,
`Div2WheelsOn` String,
`Div2TotalGTime` String,
`Div2LongestGTime` String,
`Div2WheelsOff` String,
`Div2TailNum` String,
`Div3Airport` String,
`Div3AirportID` Int32,
`Div3AirportSeqID` Int32,
`Div3WheelsOn` String,
`Div3TotalGTime` String,
`Div3LongestGTime` String,
`Div3WheelsOff` String,
`Div3TailNum` String,
`Div4Airport` String,
`Div4AirportID` Int32,
`Div4AirportSeqID` Int32,
`Div4WheelsOn` String,
`Div4TotalGTime` String,
`Div4LongestGTime` String,
`Div4WheelsOff` String,
`Div4TailNum` String,
`Div5Airport` String,
`Div5AirportID` Int32,
`Div5AirportSeqID` Int32,
`Div5WheelsOn` String,
`Div5TotalGTime` String,
`Div5LongestGTime` String,
`Div5WheelsOff` String,
`Div5TailNum` String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/ontime', '{replica}')
PARTITION BY Year
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
SETTINGS storage_policy = 'AWStiered', index_granularity = 8192;
CREATE TABLE ontime as ontime_replica
ENGINE = Distributed(ch_cluster_demo, default, ontime_replica, rand());
加载demo 数据
wget https://awspsa-quickstart.s3.amazonaws.com/clickhouse/scripts/downloaddata.sh
sleep 1
sudo sed -i "s|ontimefrom|$ontimefrom|" /home/ec2-user/tools/install/demodata/downloaddata.sh
sudo sed -i "s|ontimeto|$ontimeto|" /home/ec2-user/tools/install/demodata/downloaddata.sh
sudo chmod +x /home/ec2-user/tools/install/demodata/downloaddata.sh
sudo ./downloaddata.sh
sleep 1
# 加载数据 密码是变量,分布式环境中写分布式表
sudo ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --host $CH_Cluster_shard1_replica1 --password $chpassword --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
sleep 1
prometheus.yml
sudo cat << EOF > /etc/prometheus/prometheus.yml
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
external_labels:
monitor: 'codelab-monitor'
rule_files:
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'chproxy'
static_configs:
- targets: ['localhost:9099']
- job_name: 'CH_1_exporter'
static_configs:
- targets: ['$CH_Cluster_shard1_replica1:9116']
- job_name: 'CH_1_node_exporter'
static_configs:
- targets: ['$CH_Cluster_shard1_replica1:9100']
- job_name: 'CH_2_exporter'
static_configs:
- targets: ['$CH_Cluster_shard1_replica2:9116']
- job_name: 'CH_2_node_exporter'
static_configs:
- targets: ['$CH_Cluster_shard1_replica2:9100']
- job_name: 'CH_3_exporter'
static_configs:
- targets: ['$CH_Cluster_shard2_replica1:9116']
- job_name: 'CH_3_node_exporter'
static_configs:
- targets: ['$CH_Cluster_shard2_replica1:9100']
- job_name: 'CH_4_exporter'
static_configs:
- targets: ['$CH_Cluster_shard2_replica2:9116']
- job_name: 'CH_4_node_exporter'
static_configs:
- targets: ['$CH_Cluster_shard2_replica2:9100']
EOF
sleep 1
chproxy/config.yml
# /data/chproxy/config.yml
# 实现读写分离,可以自定义读写分布式表,副本表,比较灵活
# ./chproxy -config=/path/to/config.yml
server:
http:
listen_addr: ":9092"
allowed_networks: ["172.0.0.0/8"]
users:
- name: "distributed"
password: "$distributedpassword"
to_cluster: "distributed"
to_user: "default"
- name: "replica-write"
password: "$replicapassword"
to_cluster: "replica-write"
to_user: "default"
clusters:
- name: "distributed"
nodes: [
"$CH_Cluster_shard1_replica1:8123",
"$CH_Cluster_shard1_replica2:8123",
"$CH_Cluster_shard2_replica1:8123",
"$CH_Cluster_shard2_replica2:8123"
]
users:
- name: "default"
password: "$chpassword"
- name: "replica-write"
replicas:
- name: "replica1"
nodes: ["$CH_Cluster_shard1_replica1:8123", "$CH_Cluster_shard2_replica1:8123"]
- name: "replica2"
nodes: ["$CH_Cluster_shard1_replica2:8123", "$CH_Cluster_shard2_replica2:8123"]
users:
- name: "default"
password: "$chpassword"
caches:
- name: "shortterm"
dir: "/data/chproxy/cache/shortterm"
max_size: $cachesize
expire: $cacheexpire