本文介绍Spark性能测试的场景信息。

创建外表

本次性能测试将在Spark中创建以下八张文本格式的外表。

  • CUSTOMER表
    create external table customer(
      c_custkey integer, 
      c_name varchar(25), 
      c_address varchar(40), 
      c_nationkey integer, 
      c_phone char(15), 
      c_acctbal decimal(15, 2), 
      c_mktsegment char(10), 
      c_comment varchar(117)
    ) row format delimited fields terminated by '|' location '/.../customer' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • LINEITEM表
    create external table lineitem(
      l_orderkey integer, 
      l_partkey integer, 
      l_suppkey integer, 
      l_linenumber integer, 
      l_quantity decimal(15, 2), 
      l_extendedprice decimal(15, 2), 
      l_discount decimal(15, 2), 
      l_tax decimal(15, 2), 
      l_returnflag char(1), 
      l_linestatus char(1), 
      l_shipdate date, 
      l_commitdate date, 
      l_receiptdate date, 
      l_shipinstruct char(25), 
      l_shipmode char(10), 
      l_comment varchar(44)
    ) row format delimited fields terminated by '|' location '/.../lineitem' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • NATION表
    create external table nation(
      n_nationkey integer, 
      n_name char(25), 
      n_regionkey integer, 
      n_comment varchar(152)
    ) row format delimited fields terminated by '|' location '/.../nation' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • ORDERS表
    create external table orders(
      o_orderkey integer, 
      o_custkey integer, 
      o_orderstatus char(1), 
      o_totalprice decimal(15, 2), 
      o_orderdate date, 
      o_orderpriority char(15), 
      o_clerk char(15), 
      o_shippriority integer, 
      o_comment varchar(79)
    ) row format delimited fields terminated by '|' location '/.../orders' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • PART表
    create external table part(
      p_partkey integer, 
      p_name varchar(55), 
      p_mfgr char(25), 
      p_brand char(10), 
      p_type varchar(25), 
      p_size integer, 
      p_container char(10), 
      p_retailprice decimal(15, 2), 
      p_comment varchar(23)
    ) row format delimited fields terminated by '|' location '/.../part' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • PARTSUPP表
    create external table partsupp(
      ps_partkey integer, 
      ps_suppkey integer, 
      ps_availqty integer, 
      ps_supplycost decimal(15, 2), 
      ps_comment varchar(199)
    ) row format delimited fields terminated by '|' location '/.../partsupp' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • REGION表
    create external table region(
      r_regionkey integer, 
      r_name char(25), 
      r_comment varchar(152)
    ) row format delimited fields terminated by '|' location '/.../region/' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );
  • SUPPLIER表
    create external table supplier(
      s_suppkey integer, 
      s_name char(25), 
      s_address varchar(40), 
      s_nationkey integer, 
      s_phone char(15), 
      s_acctbal decimal(15, 2), 
      s_comment varchar(101)
    ) row format delimited fields terminated by '|' location '/.../supplier/' TBLPROPERTIES(
      'serialization.null.format' = '', 
      'serialization.encoding' = 'latin1'
    );

创建表

本次性能测试将在Spark中创建以下八张内表。

  • CUSTOMER表
    create table customer(
            c_custkey     integer,
            c_name        varchar(25),
            c_address     varchar(40),
            c_nationkey   integer,
            c_phone       char(15),
            c_acctbal     decimal(15,2),
            c_mktsegment  char(10),
            c_comment     varchar(117)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • LINEITEM表
    create table lineitem
    (
        l_orderkey    integer,
        l_partkey     integer,
        l_suppkey     integer,
        l_linenumber  integer,
        l_quantity    decimal(15,2),
        l_extendedprice  decimal(15,2),
        l_discount    decimal(15,2),
        l_tax         decimal(15,2),
        l_returnflag  char(1),
        l_linestatus  char(1),
        l_shipdate    date,
        l_commitdate  date,
        l_receiptdate date,
        l_shipinstruct char(25),
        l_shipmode     char(10),
        l_comment      varchar(44)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • NATION表
    create table nation(
            n_nationkey  integer,
            n_name       char(25),
            n_regionkey  integer,
            n_comment    varchar(152)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • ORDERS表
    create table orders(
        o_orderkey       integer,
            o_custkey        integer,
            o_orderstatus    char(1),
            o_totalprice     decimal(15,2),
            o_orderdate      date,
            o_orderpriority  char(15),
            o_clerk          char(15),
            o_shippriority   integer,
            o_comment        varchar(79)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • PART表
    create table part(
            p_partkey     integer,
            p_name        varchar(55),
            p_mfgr        char(25),
            p_brand       char(10),
            p_type        varchar(25),
            p_size        integer,
            p_container   char(10),
            p_retailprice decimal(15,2),
            p_comment     varchar(23)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • PARTSUPP表
    create table partsupp(
        ps_partkey     integer,
            ps_suppkey     integer,
            ps_availqty    integer,
            ps_supplycost  decimal(15,2),
            ps_comment     varchar(199)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • REGION表
    create table region(
        r_regionkey  integer,
        r_name       char(25),
        r_comment    varchar(152)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");
  • SUPPLIER表
    create table supplier(
        s_suppkey     integer,
        s_name        char(25),
        s_address     varchar(40),
        s_nationkey   integer,
        s_phone       char(15),
        s_acctbal     decimal(15,2),
        s_comment     varchar(101)
    )
    stored as parquet
    TBLPROPERTIES("parquet.compression"="SNAPPY");