模板附录:Clickhouse 配置文件

clickhouse config.xml



# 按参数编辑配置文件config.xml

sudo sed -i "s|<\!-- <timezone>Europe/Moscow</timezone> -->|<timezone>$timezone</timezone>|g" /etc/clickhouse-server/config.xml
sudo sed -i "s|<\!-- <listen_host>0.0.0.0</listen_host> -->|<listen_host>$listen_host</listen_host>|g" /etc/clickhouse-server/config.xml

# 按参数编辑配置文件users.xml
sudo sed -i "9a <max_threads>$max_threads</max_threads>" /etc/clickhouse-server/users.xml
sudo sed -i "9a <max_insert_threads>$max_insert_threads</max_insert_threads>" /etc/clickhouse-server/users.xml
sudo sed -i "9a <distributed_product_mode>$distributed_product_mode</distributed_product_mode>" /etc/clickhouse-server/users.xml

sudo sed -i "s|<max_memory_usage>10000000000</max_memory_usage>|<max_memory_usage>$max_memory_usage</max_memory_usage>|" /etc/clickhouse-server/users.xml
sudo sed -i "s|<load_balancing>random</load_balancing>|<load_balancing>$load_balancing</load_balancing>|" /etc/clickhouse-server/users.xml
sudo sed -i "s|<password></password>|<password>$password</password>|" /etc/clickhouse-server/users.xml

# 写死,default用户可以用sql管理user权限
sudo sed -i 's|<!-- <access_management>1</access_management> -->|<access_management>1</access_management>|' /etc/clickhouse-server/users.xml

# 判断是否s3disk,当前版本直接创建s3分层存储配置文件
# if true ,编辑  *config.d/storage.xml*
sudo cat << EOF > /etc/clickhouse-server/config.d/storage.xml

<yandex>
  <storage_configuration>
    <disks>
      <s3>
        <type>s3</type>
        <endpoint>http://s3.$REGION.amazonaws.com/$endpoint/clickhouse/</endpoint>
        <use_environment_credentials>true</use_environment_credentials>
        <max_connections>10000</max_connections>
      </s3>
    </disks>
    <policies>
      <AWStiered>
        <volumes>
          <default>
            <disk>default</disk>
            <max_data_part_size_bytes>$max_data_part_size_bytes</max_data_part_size_bytes>
            <perform_ttl_move_on_insert>0</perform_ttl_move_on_insert>
          </default>
          <s3>
            <disk>s3</disk>
          </s3>
        </volumes>
        <move_factor>$move_factor</move_factor>
      </AWStiered>
      <s3only>
        <volumes>
          <s3>
            <disk>s3</disk>
          </s3>
        </volumes>
      </s3only>
    </policies>
  </storage_configuration>
</yandex>
EOF

sleep 1



##################################   集群模式脚本   #############################

# 创建集群,按参数编辑配置文件

sed -i '524, 629d' /etc/clickhouse-server/config.xml
sudo sed -i 's|<!--</remote_url_allow_hosts>-->|<include_from>/etc/clickhouse-server/metrika.xml</include_from>|' /etc/clickhouse-server/config.xml
sudo sed -i '542a <remote_servers incl="clickhouse_remote_servers" />' /etc/clickhouse-server/config.xml
sudo sed -i '542a <zookeeper incl="zookeeper-servers" optional="true" />' /etc/clickhouse-server/config.xml





# 创建集群metrika配置,每个节点一样,以下为2分片2副本的示例

cat << EOF > /etc/clickhouse-server/metrika.xml
<?xml version="1.0"?>
<yandex>
    <!--ck集群节点-->
    <clickhouse_remote_servers>
        <ch_cluster_demo>
            <!--分片1-->
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>$CH_Cluster_shard1_replica1</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>$password</password>
                </replica>
                <!--复制集1-->
                <replica>
                    <host>$CH_Cluster_shard1_replica2</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>$password</password>
                </replica>
            </shard>
            <!--分片2-->
            <shard>
                <internal_replication>true</internal_replication>
                <replica>
                    <host>$CH_Cluster_shard2_replica1</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>$password</password>
                </replica>
                <!--复制集2-->
                <replica>
                    <host>$CH_Cluster_shard2_replica2</host>
                    <port>9000</port>
                    <user>default</user>
                    <password>$password</password>
                </replica>
            </shard>
        </ch_cluster_demo>
    </clickhouse_remote_servers>
    <!--zookeeper相关配置-->
    <zookeeper-servers>
        <node index="1">
            <host>$zookeeper1</host>
            <port>2181</port>
        </node>
        <node index="2">
            <host>$zookeeper2</host>
            <port>2181</port>
        </node>
        <node index="3">
            <host>$zookeeper3</host>
            <port>2181</port>
        </node>
    </zookeeper-servers>
    <networks>
        <ip>::/0</ip>
    </networks>
    <!--压缩相关配置-->
    <clickhouse_compression>
        <case>
            <min_part_size>10000000000</min_part_size>
            <min_part_size_ratio>0.01</min_part_size_ratio>
            <method>lz4</method>
            <!--压缩算法lz4压缩比zstd快, 更占磁盘-->
        </case>
    </clickhouse_compression>
</yandex>
EOF
sleep 1


# 创建节点宏配置macros,每个节点不一样。
cat << EOF > /etc/clickhouse-server/config.d/macros.xml

<!--node1}-->
<yandex>
    <macros>
        <!--规范:{集群名称}-{shard}-{replica序号}-->
        <replica>$replica</replica>
        <shard>$shard</shard>
        <layer>01</layer>
    </macros>
</yandex>
EOF
sleep 1



建表SQL

CREATE TABLE `ontime_replica`
(
    `Year`                            UInt16,
    `Quarter`                         UInt8,
    `Month`                           UInt8,
    `DayofMonth`                      UInt8,
    `DayOfWeek`                       UInt8,
    `FlightDate`                      Date,
    `Reporting_Airline`               String,
    `DOT_ID_Reporting_Airline`        Int32,
    `IATA_CODE_Reporting_Airline`     String,
    `Tail_Number`                     Int32,
    `Flight_Number_Reporting_Airline` String,
    `OriginAirportID`                 Int32,
    `OriginAirportSeqID`              Int32,
    `OriginCityMarketID`              Int32,
    `Origin`                          FixedString(5),
    `OriginCityName`                  String,
    `OriginState`                     FixedString(2),
    `OriginStateFips`                 String,
    `OriginStateName`                 String,
    `OriginWac`                       Int32,
    `DestAirportID`                   Int32,
    `DestAirportSeqID`                Int32,
    `DestCityMarketID`                Int32,
    `Dest`                            FixedString(5),
    `DestCityName`                    String,
    `DestState`                       FixedString(2),
    `DestStateFips`                   String,
    `DestStateName`                   String,
    `DestWac`                         Int32,
    `CRSDepTime`                      Int32,
    `DepTime`                         Int32,
    `DepDelay`                        Int32,
    `DepDelayMinutes`                 Int32,
    `DepDel15`                        Int32,
    `DepartureDelayGroups`            String,
    `DepTimeBlk`                      String,
    `TaxiOut`                         Int32,
    `WheelsOff`                       Int32,
    `WheelsOn`                        Int32,
    `TaxiIn`                          Int32,
    `CRSArrTime`                      Int32,
    `ArrTime`                         Int32,
    `ArrDelay`                        Int32,
    `ArrDelayMinutes`                 Int32,
    `ArrDel15`                        Int32,
    `ArrivalDelayGroups`              Int32,
    `ArrTimeBlk`                      String,
    `Cancelled`                       UInt8,
    `CancellationCode`                FixedString(1),
    `Diverted`                        UInt8,
    `CRSElapsedTime`                  Int32,
    `ActualElapsedTime`               Int32,
    `AirTime`                         Nullable(Int32),
    `Flights`                         Int32,
    `Distance`                        Int32,
    `DistanceGroup`                   UInt8,
    `CarrierDelay`                    Int32,
    `WeatherDelay`                    Int32,
    `NASDelay`                        Int32,
    `SecurityDelay`                   Int32,
    `LateAircraftDelay`               Int32,
    `FirstDepTime`                    String,
    `TotalAddGTime`                   String,
    `LongestAddGTime`                 String,
    `DivAirportLandings`              String,
    `DivReachedDest`                  String,
    `DivActualElapsedTime`            String,
    `DivArrDelay`                     String,
    `DivDistance`                     String,
    `Div1Airport`                     String,
    `Div1AirportID`                   Int32,
    `Div1AirportSeqID`                Int32,
    `Div1WheelsOn`                    String,
    `Div1TotalGTime`                  String,
    `Div1LongestGTime`                String,
    `Div1WheelsOff`                   String,
    `Div1TailNum`                     String,
    `Div2Airport`                     String,
    `Div2AirportID`                   Int32,
    `Div2AirportSeqID`                Int32,
    `Div2WheelsOn`                    String,
    `Div2TotalGTime`                  String,
    `Div2LongestGTime`                String,
    `Div2WheelsOff`                   String,
    `Div2TailNum`                     String,
    `Div3Airport`                     String,
    `Div3AirportID`                   Int32,
    `Div3AirportSeqID`                Int32,
    `Div3WheelsOn`                    String,
    `Div3TotalGTime`                  String,
    `Div3LongestGTime`                String,
    `Div3WheelsOff`                   String,
    `Div3TailNum`                     String,
    `Div4Airport`                     String,
    `Div4AirportID`                   Int32,
    `Div4AirportSeqID`                Int32,
    `Div4WheelsOn`                    String,
    `Div4TotalGTime`                  String,
    `Div4LongestGTime`                String,
    `Div4WheelsOff`                   String,
    `Div4TailNum`                     String,
    `Div5Airport`                     String,
    `Div5AirportID`                   Int32,
    `Div5AirportSeqID`                Int32,
    `Div5WheelsOn`                    String,
    `Div5TotalGTime`                  String,
    `Div5LongestGTime`                String,
    `Div5WheelsOff`                   String,
    `Div5TailNum`                     String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/ontime', '{replica}') 
      PARTITION BY Year
      ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
      SETTINGS storage_policy = 'AWStiered', index_granularity = 8192;

CREATE TABLE ontime as ontime_replica
ENGINE = Distributed(ch_cluster_demo, default, ontime_replica, rand());


加载demo 数据


wget https://awspsa-quickstart.s3.amazonaws.com/clickhouse/scripts/downloaddata.sh
sleep 1

sudo sed -i "s|ontimefrom|$ontimefrom|" /home/ec2-user/tools/install/demodata/downloaddata.sh
sudo sed -i "s|ontimeto|$ontimeto|" /home/ec2-user/tools/install/demodata/downloaddata.sh

sudo chmod +x /home/ec2-user/tools/install/demodata/downloaddata.sh 
sudo ./downloaddata.sh 
sleep 1

# 加载数据 密码是变量,分布式环境中写分布式表
sudo ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --host $CH_Cluster_shard1_replica1 --password $chpassword --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
sleep 1

prometheus.yml


sudo cat << EOF > /etc/prometheus/prometheus.yml
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  external_labels:
      monitor: 'codelab-monitor'
rule_files:
scrape_configs:
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']
  - job_name: 'chproxy'
    static_configs:
      - targets: ['localhost:9099']
  - job_name: 'CH_1_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard1_replica1:9116']
  - job_name: 'CH_1_node_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard1_replica1:9100']
  - job_name: 'CH_2_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard1_replica2:9116']
  - job_name: 'CH_2_node_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard1_replica2:9100']
  - job_name: 'CH_3_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard2_replica1:9116']
  - job_name: 'CH_3_node_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard2_replica1:9100']
  - job_name: 'CH_4_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard2_replica2:9116']
  - job_name: 'CH_4_node_exporter'
    static_configs:
      - targets: ['$CH_Cluster_shard2_replica2:9100']
EOF
sleep 1


chproxy/config.yml

# /data/chproxy/config.yml
# 实现读写分离,可以自定义读写分布式表,副本表,比较灵活
# ./chproxy -config=/path/to/config.yml

server:
  http:
      listen_addr: ":9092"
      allowed_networks: ["172.0.0.0/8"]

users:
  - name: "distributed"
    password: "$distributedpassword"
    to_cluster: "distributed"
    to_user: "default"
  - name: "replica-write"
    password: "$replicapassword"
    to_cluster: "replica-write"
    to_user: "default"


clusters:
  - name: "distributed"
    nodes: [
      "$CH_Cluster_shard1_replica1:8123",
      "$CH_Cluster_shard1_replica2:8123",
      "$CH_Cluster_shard2_replica1:8123",
      "$CH_Cluster_shard2_replica2:8123"
    ]
    users:
      - name: "default"
        password: "$chpassword"

  - name: "replica-write"
    replicas:
      - name: "replica1"
        nodes: ["$CH_Cluster_shard1_replica1:8123", "$CH_Cluster_shard2_replica1:8123"]
      - name: "replica2"
        nodes: ["$CH_Cluster_shard1_replica2:8123", "$CH_Cluster_shard2_replica2:8123"]
    users:
      - name: "default"
        password: "$chpassword"

caches:
  - name: "shortterm"
    dir: "/data/chproxy/cache/shortterm"
    max_size: $cachesize
    expire: $cacheexpire