2023-02-10 clickhouse导入tpch数据-记录

Posted 帝尊悟世

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2023-02-10 clickhouse导入tpch数据-记录相关的知识,希望对你有一定的参考价值。

摘要:

clickhouse导入tpch数据-记录

生成TPCH数据:

2022-07-06 使用tpch大数据量压测mysql_大数据怎么压测_帝尊悟世的博客-CSDN博客

clickhouse导入数据

一. 创建tpch数据库

create database tpch;

use tpch;

二. 创建表

使用MergeTree引擎


create table nation  ( n_nationkey  integer not null,
                            n_name       char(25) not null,
                            n_regionkey  integer not null,
                            n_comment    varchar(152),primary key (n_nationkey))engine=MergeTree;
 
create table region  ( r_regionkey  integer not null,
                            r_name       char(25) not null,
                            r_comment    varchar(152),primary key (r_regionkey))engine=MergeTree;
 
create table part  ( p_partkey     integer not null,
                          p_name        varchar(55) not null,
                          p_mfgr        char(25) not null,
                          p_brand       char(10) not null,
                          p_type        varchar(25) not null,
                          p_size        integer not null,
                          p_container   char(10) not null,
                          p_retailprice decimal(15,2) not null,
                          p_comment     varchar(23) not null,primary key (p_partkey) )engine=MergeTree;
 
create table supplier ( s_suppkey     integer not null,
                             s_name        char(25) not null,
                             s_address     varchar(40) not null,
                             s_nationkey   integer not null,
                             s_phone       char(15) not null,
                             s_acctbal     decimal(15,2) not null,
                             s_comment     varchar(101) not null,primary key (s_suppkey))engine=MergeTree;
 
create table partsupp ( ps_partkey     integer not null,
                             ps_suppkey     integer not null,
                             ps_availqty    integer not null,
                             ps_supplycost  decimal(15,2)  not null,
                             ps_comment     varchar(199) not null,primary key (ps_partkey,ps_suppkey) )engine=MergeTree;
 
create table customer ( c_custkey     integer not null,
                             c_name        varchar(25) not null,
                             c_address     varchar(40) not null,
                             c_nationkey   integer not null,
                             c_phone       char(15) not null,
                             c_acctbal     decimal(15,2)   not null,
                             c_mktsegment  char(10) not null,
                             c_comment     varchar(117) not null,primary key (c_custkey))engine=MergeTree;
 
create table orders  ( o_orderkey       integer not null,
                           o_custkey        integer not null,
                           o_orderstatus    char(1) not null,
                           o_totalprice     decimal(15,2) not null,
                           o_orderdate      date not null,
                           o_orderpriority  char(15) not null,  
                           o_clerk          char(15) not null, 
                           o_shippriority   integer not null,
                           o_comment        varchar(79) not null,primary key (o_orderkey))engine=MergeTree;
 
create table lineitem ( l_orderkey    integer not null,
                             l_partkey     integer not null,
                             l_suppkey     integer not null,
                             l_linenumber  integer not null,
                             l_quantity    decimal(15,2) not null,
                             l_extendedprice  decimal(15,2) not null,
                             l_discount    decimal(15,2) not null,
                             l_tax         decimal(15,2) not null,
                             l_returnflag  char(1) not null,
                             l_linestatus  char(1) not null,
                             l_shipdate    date not null,
                             l_commitdate  date not null,
                             l_receiptdate date not null,
                             l_shipinstruct char(25) not null,
                             l_shipmode     char(10) not null,
                             l_comment      varchar(44) not null,primary key (l_orderkey,l_linenumber))engine=MergeTree;
 

三. 导入数据

clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.customer format CSV" < ./customer.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.lineitem format CSV" < ./lineitem.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.nation format CSV" < ./nation.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.orders format CSV" < ./orders.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.partsupp format CSV" < ./partsupp.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.part format CSV" < ./part.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.region format CSV" < ./region.tbl
clickhouse-client -m --format_csv_delimiter="|" --query="insert into tpch.supplier format CSV" < ./supplier.tbl

查询测试:

一. 测试查询结果


SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    sum(l_quantity)
FROM customer, orders, lineitem
WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey)
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate ASC
LIMIT 10

Query id: 0e1124c6-1ace-4c98-9cc3-c433e34aea80

↑ Progress: 1.96 million rows, 23.55 MB (5.45 million rows/s., 65.43 MB/s.)  32%
┌─c_name─────────────┬─c_custkey─┬─o_orderkey─┬─o_orderdate─┬─o_totalprice─┬─sum(l_quantity)─┐
│ Customer#000021433 │     21433 │    1750466 │  1992-11-30 │    555285.16 │             300 │
│ Customer#000128120 │    128120 │    4722021 │  1994-04-07 │    544089.09 │             323 │
│ Customer#000144617 │    144617 │    3043270 │  1997-02-12 │    530604.44 │             317 │
│ Customer#000108931 │    108931 │    4576548 │  1997-12-26 │    525590.57 │             295 │
│ Customer#000013940 │     13940 │    2232932 │  1997-04-13 │    522720.61 │             304 │
│ Customer#000024049 │     24049 │    3586919 │  1992-11-07 │    522644.48 │             299 │
│ Customer#000066790 │     66790 │    2199712 │  1996-09-30 │    515531.82 │             327 │
│ Customer#000051796 │     51796 │    2185667 │  1992-10-08 │    511359.88 │             286 │
│ Customer#000100685 │    100685 │    4515876 │  1993-11-02 │     510061.6 │             293 │
│ Customer#000141100 │    141100 │     972901 │  1992-07-18 │    508668.52 │             293 │
└────────────────────┴───────────┴────────────┴─────────────┴──────────────┴─────────────────┘

10 rows in set. Elapsed: 14.116 sec. Processed 7.65 million rows, 103.66 MB (542.03 thousand rows/s., 7.34 MB/s.)

二. EXPLAIN分析

PLAN:

EXPLAIN
SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    sum(l_quantity)
FROM customer, orders, lineitem
WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey)
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate ASC
LIMIT 10

Query id: 62efd73d-c270-4b2d-ba61-a8ec635ff008

┌─explain────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Expression (Projection)                                                                                        │
│   Limit (preliminary LIMIT (without OFFSET))                                                                   │
│     Sorting (Sorting for ORDER BY)                                                                             │
│       Expression (Before ORDER BY)                                                                             │
│         Aggregating                                                                                            │
│           Expression (Before GROUP BY)                                                                         │
│             Filter (WHERE)                                                                                     │
│               Join (JOIN FillRightFirst)                                                                       │
│                 Filter (( + (Before JOIN + (Projection + Before ORDER BY))))                                   │
│                   Filter (WHERE)                                                                               │
│                     Join (JOIN FillRightFirst)                                                                 │
│                       Expression (Before JOIN)                                                                 │
│                         ReadFromMergeTree (tpch.customer)                                                      │
│                       Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) │
│                         ReadFromMergeTree (tpch.orders)                                                        │
│                 Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY))))       │
│                   ReadFromMergeTree (tpch.lineitem)                                                            │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

17 rows in set. Elapsed: 0.114 sec. 

AST:

EXPLAIN AST
SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    sum(l_quantity)
FROM customer, orders, lineitem
WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey)
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate ASC
LIMIT 10

Query id: 1ab3db63-983f-4b30-8385-3a32a29f4251

┌─explain─────────────────────────────────────┐
│ SelectWithUnionQuery (children 1)           │
│  ExpressionList (children 1)                │
│   SelectQuery (children 6)                  │
│    ExpressionList (children 6)              │
│     Identifier c_name                       │
│     Identifier c_custkey                    │
│     Identifier o_orderkey                   │
│     Identifier o_orderdate                  │
│     Identifier o_totalprice                 │
│     Function sum (children 1)               │
│      ExpressionList (children 1)            │
│       Identifier l_quantity                 │
│    TablesInSelectQuery (children 3)         │
│     TablesInSelectQueryElement (children 1) │
│      TableExpression (children 1)           │
│       TableIdentifier customer              │
│     TablesInSelectQueryElement (children 2) │
│      TableExpression (children 1)           │
│       TableIdentifier orders                │
│      TableJoin                              │
│     TablesInSelectQueryElement (children 2) │
│      TableExpression (children 1)           │
│       TableIdentifier lineitem              │
│      TableJoin                              │
│    Function and (children 1)                │
│     ExpressionList (children 2)             │
│      Function equals (children 1)           │
│       ExpressionList (children 2)           │
│        Identifier c_custkey                 │
│        Identifier o_custkey                 │
│      Function equals (children 1)           │
│       ExpressionList (children 2)           │
│        Identifier o_orderkey                │
│        Identifier l_orderkey                │
│    ExpressionList (children 5)              │
│     Identifier c_name                       │
│     Identifier c_custkey                    │
│     Identifier o_orderkey                   │
│     Identifier o_orderdate                  │
│     Identifier o_totalprice                 │
│    ExpressionList (children 2)              │
│     OrderByElement (children 1)             │
│      Identifier o_totalprice                │
│     OrderByElement (children 1)             │
│      Identifier o_orderdate                 │
│    Literal UInt64_10                        │
└─────────────────────────────────────────────┘

46 rows in set. Elapsed: 0.010 sec. 

PIPELINE:

EXPLAIN PIPELINE
SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    sum(l_quantity)
FROM customer, orders, lineitem
WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey)
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate ASC
LIMIT 10

Query id: 536c2a51-ac9c-48b9-a031-c3dba1a1a8d5

┌─explain───────────────────────────────────────────────────────┐
│ (Expression)                                                  │
│ ExpressionTransform                                           │
│   (Limit)                                                     │
│   Limit                                                       │
│     (Sorting)                                                 │
│     MergingSortedTransform 16 → 1                             │
│       MergeSortingTransform × 16                              │
│         LimitsCheckingTransform × 16                          │
│           PartialSortingTransform × 16                        │
│             (Expression)                                      │
│             ExpressionTransform × 16                          │
│               (Aggregating)                                   │
│               Resize 1 → 16                                   │
│                 AggregatingTransform                          │
│                   (Expression)                                │
│                   ExpressionTransform                         │
│                     (Filter)                                  │
│                     FilterTransform                           │
│                       (Join)                                  │
│                       JoiningTransform 2 → 1                  │
│                         (Filter)                              │
│                         FilterTransform                       │
│                           (Filter)                            │
│                           FilterTransform                     │
│                             (Join)                            │
│                             JoiningTransform 2 → 1            │
│                               (Expression)                    │
│                               ExpressionTransform             │
│                                 (ReadFromMergeTree)           │
│                                 MergeTreeInOrder 0 → 1        │
│                               (Expression)                    │
│                               FillingRightJoinSide            │
│                                 Resize 8 → 1                  │
│                                   ExpressionTransform × 8     │
│                                     (ReadFromMergeTree)       │
│                                     MergeTreeThread × 8 0 → 1 │
│                         (Expression)                          │
│                         FillingRightJoinSide                  │
│                           Resize 16 → 1                       │
│                             ExpressionTransform × 16          │
│                               (ReadFromMergeTree)             │
│                               MergeTreeThread × 16 0 → 1      │
└───────────────────────────────────────────────────────────────┘

42 rows in set. Elapsed: 0.168 sec. 

SYNTAX:

EXPLAIN SYNTAX
SELECT
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice,
    sum(l_quantity)
FROM customer, orders, lineitem
WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey)
GROUP BY
    c_name,
    c_custkey,
    o_orderkey,
    o_orderdate,
    o_totalprice
ORDER BY
    o_totalprice DESC,
    o_orderdate ASC
LIMIT 10

Query id: deaf37a6-d685-44d3-8de2-77671d8559a7

┌─explain─────────────────────────────────────────────────────┐
│ SELECT                                                      │
│     c_name,                                                 │
│     c_custkey,                                              │
│     o_orderkey,                                             │
│     o_orderdate,                                            │
│     o_totalprice,                                           │
│     sum(l_quantity)                                         │
│ FROM                                                        │
│ (                                                           │
│     SELECT                                                  │
│         c_custkey,                                          │
│         c_name,                                             │
│         o_custkey,                                          │
│         o_totalprice,                                       │
│         o_orderdate,                                        │
│         o_orderkey                                          │
│     FROM customer                                           │
│     ALL INNER JOIN                                          │
│     (                                                       │
│         SELECT                                              │
│             o_orderkey,                                     │
│             o_orderdate,                                    │
│             o_totalprice,                                   │
│             o_custkey                                       │
│         FROM orders                                         │
│     ) AS orders ON c_custkey = o_custkey                    │
│     WHERE c_custkey = o_custkey                             │
│ ) AS `--.s`                                                 │
│ ALL INNER JOIN                                              │
│ (                                                           │
│     SELECT                                                  │
│         l_quantity,                                         │
│         l_orderkey                                          │
│     FROM lineitem                                           │
│ ) AS lineitem ON o_orderkey = l_orderkey                    │
│ WHERE (c_custkey = o_custkey) AND (o_orderkey = l_orderkey) │
│ GROUP BY                                                    │
│     c_name,                                                 │
│     c_custkey,                                              │
│     o_orderkey,                                             │
│     o_orderdate,                                            │
│     o_totalprice                                            │
│ ORDER BY                                                    │
│     o_totalprice DESC,                                      │
│     o_orderdate ASC                                         │
│ LIMIT 10                                                    │
└─────────────────────────────────────────────────────────────┘

46 rows in set. Elapsed: 0.073 sec. 

以上是关于2023-02-10 clickhouse导入tpch数据-记录的主要内容,如果未能解决你的问题,请参考以下文章

ClickHouse数据导入

clickhouse使用waterdrop将Hive中的数据导入ClickHouse

Python 连接clickhouse数据库以及新建表结构,csv导入数据

clickhouse安装数据导入及查询测试

hive导入到clickhouse的几种方式总结

如何将带有 YYYYMMDD 列的 CSV 文件导入 ClickHouse 中的 DATE 列